
    h                        S SK Jr  S SKrS SKJrJr  SSKJr  SSKJ	r	J
r
JrJr  \(       a  S SKJr  SSKJr  \R"                  " \5      r " S	 S
\5      r " S S\5      r " S S\5      r " S S5      rg)    )annotationsN)TYPE_CHECKING
NamedTuple   )_URLPattern)
_hexescape_parse_time_period_quote_path_quote_pattern)time)Protegoc                  >    \ rS rSr% S\S'   S\S'   S\S'   S\S'   Srg	)
RequestRate   intrequestssecondsztime | None
start_timeend_time N__name__
__module____qualname____firstlineno____annotations____static_attributes__r       W/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/protego/_ruleset.pyr   r      s    MLr   r   c                  *    \ rS rSr% S\S'   S\S'   Srg)	VisitTime   r   r   r   r   Nr   r   r   r   r!   r!      s    Nr   r!   c                  *    \ rS rSr% S\S'   S\S'   Srg)_Rule#   strfieldr   valuer   Nr   r   r   r   r$   r$   #   s    Jr   r$   c                     \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
\SS	 j5       r\R                  SS
 j5       r\SS j5       r\R                  SS j5       r\SS j5       r\R                  SS j5       rSrg)_RuleSet(   z3Internal class which stores rules for a user agent.c                V    S U l         / U l        S U l        S U l        S U l        Xl        g )N)
user_agent_rules_crawl_delay	_req_rate_visit_time_parser_instance)selfparser_instances     r   __init___RuleSet.__init__+   s-    &*#%*.-1-1)8r   c                    U R                   c   eUR                  5       R                  5       nU R                   S:X  a  gU R                   U;   a  [        U R                   5      $ g)zReturn matching score.*r   r   )r-   striplowerlen)r3   	robotnames     r   
applies_to_RuleSet.applies_to3   sT    ***OO%++-	??c!??i't''r   c                >   SU;   a*  U R                  UR                  S[        S5      5      5        [        U5      nU(       d  g U R                  R                  [        S[        U5      S95        UR                  S5      (       a  U R                  US S S-   5        g g )N$allowr'   r(   z/index.htmli)	rA   replacer   r   r.   appendr$   r   endswithr3   patterns     r   rA   _RuleSet.allow=   s    '>JJwsJsO<= )5wk'6JKL M**JJwt}s*+ +r   c                    SU;   a*  U R                  UR                  S[        S5      5      5        [        U5      nU(       d  g U R                  R                  [        S[        U5      S95        g )Nr@   disallowrB   )rJ   rC   r   r   r.   rD   r$   r   rF   s     r   rJ   _RuleSet.disallowJ   sR    '>MM'//#z#?@ )5zW9MNOr   c                :    U R                   R                  S SS9  g )Nc                L    U R                   R                  U R                  S:H  4$ )NrA   )r(   priorityr'   )rs    r   <lambda>)_RuleSet.finalize_rules.<locals>.<lambda>U   s    177++QWW-?@r   T)keyreverse)r.   sortr3   s    r   finalize_rules_RuleSet.finalize_rulesS   s    @$ 	 	
r   c                    [        U5      nSnU R                   H8  nUR                  R                  U5      (       d  M%  UR                  S:X  a  Sn  U$    U$ )z!Return if the url can be fetched.TrJ   F)r
   r.   r(   matchr'   )r3   urlallowedrules       r   	can_fetch_RuleSet.can_fetchX   sT    #KKDzz$$::+#G  
 r   c                    U R                   $ )z'Get & set crawl delay for the rule set.)r/   rU   s    r   crawl_delay_RuleSet.crawl_delayc   s        r   c                     [        U5      U l        g ! [         a3    [        R	                  SU R
                  R                   SU S35         g f = f)NMalformed rule at line z : cannot set crawl delay to ''. Ignoring this rule.)floatr/   
ValueErrorloggerdebugr2   _total_line_seen)r3   delays     r   r`   ra   h   sV    	 %eD 	LL)$*?*?*P*P)Q R..3W4JL	s    :AAc                    U R                   $ )z(Get & set request rate for the rule set.)r0   rU   s    r   request_rate_RuleSet.request_rates   s     ~~r   c                    UR                  5       n[        U5      S:X  a  Uu  p4OUS   SpCUR                  S5      u  pVUS   R                  5       n[        U5      [        US S 5      pUS:X  a  U	S-  n	OUS:X  a  U	S	-  n	OUS
:X  a  U	S-  n	S n
S nU(       a  [	        U5      u  p[        XX5      U l        g ! [
         a3    [        R                  SU R                  R                   SU S35         g f = f)N   r    /m<   hi  diQ rc   z" : cannot set request rate using 'rd   )splitr;   r:   r   r	   	Exceptionrg   rh   r2   ri   r   r0   )r3   r(   partsratetime_periodrequests_strseconds_str	time_unitr   r   r   r   s               r   rl   rm   x   s   	KKME5zQ$)!k$)!Hbk(,

3%L#B--/I #L 13{3B7G3HgC2c!4c!5 JH'9+'F$
 %X
M  	LL)$*?*?*P*P)Q R2278NP 	s   B#B7 7:C43C4c                    U R                   $ )z&Get & set visit time for the rule set.)r1   rU   s    r   
visit_time_RuleSet.visit_time   s     r   c                     [        USS9u  p#[        X#5      U l        g ! [         a3    [        R                  SU R                  R
                   SU S35         g f = f)N )	separatorrc   z  : cannot set visit time using 'rd   )r	   rx   rg   rh   r2   ri   r!   r1   )r3   r(   r   r   s       r   r   r      so    	#5es#K J %Z:  	LL)$*?*?*P*P)Q R005w6LN 	s    :AA)r/   r2   r0   r.   r1   r-   N)r4   r   )r<   r&   returnr   )rG   r&   r   None)r   r   )rZ   r&   r   bool)r   zfloat | None)rj   r&   r   r   )r   zRequestRate | None)r(   r&   r   r   )r   zVisitTime | None)r   r   r   r   __doc__r5   r=   rA   rJ   rV   r]   propertyr`   setterrl   r   r   r   r   r   r*   r*   (   s    =9,P

	 ! !     N NB     	; 	;r   r*   )
__future__r   loggingtypingr   r   _urlpatternr   _utilsr   r	   r
   r   datetimer   _protegor   	getLoggerr   rg   r   r!   r$   r*   r   r   r   <module>r      sh    "  , $  ! 
		8	$* 
 
J 
A; A;r   