
    h$                        S SK Jr  S SKrS SKJr  SSKJrJrJr  \(       a  S SK	J
r
  S SKJr  \R                  " \5      r1 SkrS	1r1 S
kr1 SkrSS1rSS1rSS1rS1rSS jr " S S5      rg)    )annotationsN)TYPE_CHECKING   )RequestRate	VisitTime_RuleSet)Iterator)Self>   disalowdiasllowdisallawdisallowdissalow	dissallowallow>   	useragent
user agent
user-agent>   sitemapsitemapssite-mapzcrawl-delayzcrawl delayzrequest-ratezrequest ratez
visit-timez
visit timehostc           
         [        U [        ;   U [        ;   U [        ;   U [        ;   U [
        ;   U [        ;   U [        ;   /5      $ N)any_DISALLOW_DIRECTIVE_ALLOW_DIRECTIVE_USER_AGENT_DIRECTIVE_SITEMAP_DIRECTIVE_CRAWL_DELAY_DIRECTIVE_REQUEST_RATE_DIRECTIVE_HOST_DIRECTIVE)fields    W/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/protego/_protego.py_is_valid_directive_fieldr%   #   sR    ((%%**''++,,_$	

 
    c                      \ rS rSrSS jr\SS j5       rSS jrSS jrSS jr	SS jr
SS jrSS	 jr\SS
 j5       r\SS j5       r\SS j5       rSrg)Protego1   c                f    0 U l         S U l        / U l        0 U l        SU l        SU l        SU l        g Nr   )_user_agents_host_sitemap_list_matched_rule_set_total_line_seen_invalid_directive_seen_total_directive_seenselfs    r$   __init__Protego.__init__2   s@    13 "&
 )+ >@ !'($%&"r&   c                    U " 5       n[        U[        5      (       d!  [        S[        U5      R                   35      eUR                  U5        U$ )NzProtego.parse expects str, got )
isinstancestr
ValueErrortype__name___parse_robotstxt)clscontentos      r$   parseProtego.parseC   sG    E'3''>tG}?U?U>VWXX	7#r&   c                P   UR                  5       n/ nS nU GHZ  nU =R                  S-  sl        UR                  S5      nUS:w  a  USU R                  5       nUR                  5       nU(       d  M\  UR                  S5      S:w  a  UR	                  SS5      u  pxOuUR	                  S5      n	[        U	5      S:  a  M  U	S   n
[        S[        U	5      5       H3  n[        U
5      (       a  U
SR                  XS  5      p  OU
SX   -   -  n
M5     M  UR                  5       R                  5       nUR                  5       nU(       d  UnGM5  U(       d:  U[        ;  a0  U[        ;  a&  [        R                  SU R                   S	35        GMv  U =R                  S-  sl        U[        ;   a  U(       a  U[        ;  a  / nUR                  5       R                  5       nS nUS
:w  a  S
U;   a  UR                  S
S5      nX/nU H  nU(       d  M  U R                   R#                  US 5      nU(       a  UU;  a  UR%                  U5        U(       a  MO  ['        U 5      nUUl        UU R                   U'   UR%                  U5        M     OU[*        ;   a  U H  nUR-                  U5        M     OU[.        ;   a  U H  nUR1                  U5        M     OU[        ;   a  U R2                  R%                  U5        OwU[4        ;   a  U H
  nUUl        M     O\U[8        ;   a  U H
  nUUl        M     OAU[<        ;   a  Xl        O0U[@        ;   a  U H
  nUUl!        M     OU =RD                  S-  sl"        UnGM]     U R                   RG                  5        H  nURI                  5         M     g )Nr   #r   :    zRule at line z) without any user agent to enforce it on.* )%
splitlinesr0   findstripsplitlenranger%   joinlowerr   r   loggerdebugr2   replacer,   getappendr   
user_agentr   r   r   r   r.   r    crawl_delayr!   request_rater"   r-   _VISIT_TIME_DIRECTIVE
visit_timer1   valuesfinalize_rules)r4   r?   linescurrent_rule_setsprevious_rule_fieldlinehash_posr#   valuepartspossible_filedirX   user_agent_without_asteriskuser_agentsuarule_sets                    r$   r=   Protego._parse_robotstxtK   sh   ""$ -/ #D!!Q&! yy~H2~Ah'--/ ::<D yy~##zz#q1u 

3u:>!&qq#e*-A0@@'5sxxb	7Ju"cEHn4N	 . KKM'')EKKME &+# &!66!33#D$9$9#::cd &&!+&--'+3HH(*% #[[]002
.2+$
):2<2D2DS"2M/)G%B #0044R>HH4E$E)00:#8#+D>.0+08))"-)00: & ** 1HNN5) !2 -- 1H%%e, !2 ,,""))%000 1H+0H( !2 11 1H,1H) !2 /)"
// 1H*/H' !2 ,,1,"'] ` ))002H##% 3r&   c                  ^ U R                   (       d  gTU R                  ;   a  U R                  T   $ U4S jU R                   R                  5        5       n[        US S9u  p4U(       d  SU R                  T'   gX@R                  T'   U$ )z0Return the rule set with highest matching score.Nc              3  H   >#    U  H  oR                  T5      U4v   M     g 7fr   )
applies_to).0rsrX   s     r$   	<genexpr>1Protego._get_matching_rule_set.<locals>.<genexpr>   s!       
6P]]:&+6Ps   "c                    U S   $ r+    )ps    r$   <lambda>0Protego._get_matching_rule_set.<locals>.<lambda>   s    PQRSPTr&   )key)r,   r/   r]   max)r4   rX   score_rule_set_pairsmatch_scorematched_rule_sets    `   r$   _get_matching_rule_setProtego._get_matching_rule_set   s      ///))*55 
6:6G6G6N6N6P 
 ),,@n(U%15D"":.-=z*r&   c                V    U R                  U5      nU(       d  gUR                  U5      $ )zHReturn True if the user agent can fetch the URL, otherwise return False.T)r~   	can_fetch)r4   urlrX   r}   s       r$   r   Protego.can_fetch   s*    66zB))#..r&   c                L    U R                  U5      nU(       d  gUR                  $ )zfReturn the crawl delay specified for the user agent as a float.
If nothing is specified, return None.
N)r~   rY   r4   rX   r}   s      r$   rY   Protego.crawl_delay   s'      66zB+++r&   c                L    U R                  U5      nU(       d  gUR                  $ )zReturn the request rate specified for the user agent as a named tuple
RequestRate(requests, seconds, start_time, end_time). If nothing is
specified, return None.
N)r~   rZ   r   s      r$   rZ   Protego.request_rate   s'    
  66zB,,,r&   c                L    U R                  U5      nU(       d  gUR                  $ )zReturn the visit time specified for the user agent as a named tuple
VisitTime(start_time, end_time). If nothing is specified, return None.
N)r~   r\   r   s      r$   r\   Protego.visit_time   s'      66zB***r&   c                ,    [        U R                  5      $ )z7Get an iterator containing links to sitemaps specified.)iterr.   r3   s    r$   r   Protego.sitemaps   s     D&&''r&   c                    U R                   $ )zGet the preferred host.)r-   r3   s    r$   preferred_hostProtego.preferred_host  s     zzr&   c                4    U R                   U R                  -
  $ r   )r2   r1   r3   s    r$   _valid_directive_seenProtego._valid_directive_seen  s    ))D,H,HHHr&   )r-   r1   r/   r.   r2   r0   r,   N)returnNone)r?   r9   r   r
   )r?   r9   r   r   )rX   r9   r   z_RuleSet | None)r   r9   rX   r9   r   bool)rX   r9   r   zfloat | None)rX   r9   r   zRequestRate | None)rX   r9   r   zVisitTime | None)r   zIterator[str])r   z
str | None)r   int)r<   
__module____qualname____firstlineno__r5   classmethodrA   r=   r~   r   rY   rZ   r\   propertyr   r   r   __static_attributes__ru   r&   r$   r(   r(   1   sy    '"  {&z $/,-+ ( (   I Ir&   r(   )r#   r9   r   r   )
__future__r   loggingtypingr   _rulesetr   r   r   collections.abcr	   typing_extensionsr
   	getLoggerr<   rS   r   r   r   r   r    r!   r[   r"   r%   r(   ru   r&   r$   <module>r      s    "    6 6( ' 
		8	$  9 A 8 '7 )>: %|4 (XI XIr&   