
    !h.                       S r SSKJr  SSKrSSKJrJrJr  SSKJ	r	J
r
JrJrJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  \	(       a  SSKJrJr  SSK J!r!  SSK"J#r#  SSK$J%r%  \" S5      r&\\'\   /\'\   4   r(\\\/\\   4   r)SS jr*SS jr+SS jr,\" 5       r- " S S5      r. " S S\5      r/g)z
This modules implements the CrawlSpider which is the recommended spider to use
for scraping typical websites that requires crawling pages.

See documentation in docs/topics/spiders.rst
    )annotationsN)AsyncIterator	AwaitableCallable)TYPE_CHECKINGAnyOptionalTypeVarcast)Failure)HtmlResponseRequestResponse)Link)LinkExtractor)Spider)collect_asyncgeniterate_spider_output)IterableSequence)Self)Crawler)	CallbackT_Tc                    U $ N )xs    [/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/spiders/crawl.py	_identityr!   &   s    H    c                    U $ r   r   )requestresponses     r    _identity_process_requestr&   *   s    Nr"   c                j    [        U 5      (       a  U $ [        U [        5      (       a  [        XS 5      $ g r   )callable
isinstancestrgetattr)methodspiders     r    _get_methodr.   .   s/    &#vt,,r"   c                  T    \ rS rSr       S             SS jjrSS jrSrg)	Rule9   Nc                    U=(       d    [         U l        X l        Xpl        U=(       d    0 U l        U=(       d    [
        U l        U=(       d    [        U l        Ub  X@l	        g U(       + U l	        g r   )
_default_link_extractorlink_extractorcallbackerrback	cb_kwargsr!   process_linksr&   process_requestfollow)selfr4   r5   r7   r:   r8   r9   r6   s           r    __init__Rule.__init__:   sY     .<-V?V08>E)2b2?2L988 	 '-&8F(lr"   c                `   [        S[        U R                  U5      5      U l        [        [        [        /[
        4   [        U R                  U5      5      U l        [        [        [        U R                  U5      5      U l        [        [        [        U R                  U5      5      U l
        g )Nr   )r   r.   r5   r   r   r   r6   ProcessLinksTr8   ProcessRequestTr9   )r;   r-   s     r    _compileRule._compileN   s    [+dmmV*LMHgY^4k$,,PV6WX!;t'9'96B
  $[)=)=vF 
r"   )r5   r7   r6   r:   r4   r8   r9   )NNNNNNN)r4   zLinkExtractor | Noner5   zCallbackT | str | Noner7   zdict[str, Any] | Noner:   zbool | Noner8   zProcessLinksT | str | Noner9   zProcessRequestT | str | Noner6   z%Callable[[Failure], Any] | str | None)r-   r   returnNone)__name__
__module____qualname____firstlineno__r<   rA   __static_attributes__r   r"   r    r0   r0   9   so     04+/+/"488<9=K,K )K )	K
 K 2K 6K 7K(	
r"   r0   c                     ^  \ rS rSr% SrS\S'   S\S'   S\S'   SU 4S	 jjrSS
 jrSS jr      SS jr	SS jr
SS jrSS jrSS jr S         SS jjr      SS jrS S jr\S!U 4S jj5       rSrU =r$ )"CrawlSpiderZ   r   zSequence[Rule]rulesz
list[Rule]_rulesbool_follow_linksc                F   > [         TU ]  " U0 UD6  U R                  5         g r   )superr<   _compile_rules)r;   akw	__class__s      r    r<   CrawlSpider.__init___   s!    !"r"r"   c                :    U R                  UU R                  USS9$ )NT)r%   r5   r7   r:   )_parse_responseparse_start_urlr;   r%   kwargss      r    _parseCrawlSpider._parsec   s,    ##))	 $ 
 	
r"   c                    / $ r   r   r[   s      r    rZ   CrawlSpider.parse_start_urlk   s    	r"   c                    U$ r   r   )r;   r%   resultss      r    process_resultsCrawlSpider.process_resultsn   s	     r"   c                n    [        UR                  U R                  U R                  XR                  S.S9$ )N)rule	link_text)urlr5   r6   meta)r   rh   	_callback_errbacktext)r;   
rule_indexlinks      r    _build_requestCrawlSpider._build_requests   s.    ^^MM$99=	
 	
r"   c              #    #    [        U[        5      (       d  g [        5       n[        U R                  5       H  u  p4UR
                  R                  U5       Vs/ s H  nXR;  d  M
  UPM     nn[        [        UR                  5      " U5       HG  nUR                  U5        U R                  X75      n[        [        UR                  5      " X5      v   MI     M     g s  snf 7fr   )r)   r   set	enumeraterN   r4   extract_linksr   r?   r8   addro   r@   r9   )	r;   r%   seenrm   rf   lnklinksrn   r$   s	            r    _requests_to_followCrawlSpider._requests_to_follow{   s     (L11% )$++ 6J  ..<<XF!FC? F  !
 ]D,>,>?F--j??D,@,@A'TT G !7!s   AC$	C'C-A7C$c                    U R                   [        [        UR                  S   5         nU R	                  U[        SUR
                  5      0 UR                  EUEUR                  5      $ )Nrf   r   )rN   r   intri   rY   r5   r7   r:   )r;   r%   r7   rf   s       r    rj   CrawlSpider._callback   s\    {{4X]]6%:;<##dmm,+t~~++KK	
 	
r"   c                    U R                   [        [        UR                  R                  S   5         nU R                  U[        [        [        /[        4   UR                  5      5      $ )Nrf   )
rN   r   r|   r$   ri   _handle_failurer   r   r   r6   )r;   failurerf   s      r    rk   CrawlSpider._errback   sT    {{4W__%9%9&%ABC##T(G9c>2DLLA
 	
r"   c                 #    U(       a  U" U40 UD6=(       d    Sn[        U[        5      (       a  [        U5      I S h  vN nO[        U[        5      (       a
  UI S h  vN nU R	                  X5      n[        U5       H  nU7v   M
     U(       a0  U R                  (       a  U R                  U5       H  nU7v   M
     g g g  N Nf7fNr   )r)   r   r   r   rc   r   rP   ry   )r;   r%   r5   r7   r:   cb_resrequest_or_items          r    rY   CrawlSpider._parse_response   s      h4)4:F&-00/77FI..%))(;F#8#@%% $A d((#'#;#;H#E%% $F )6 8%s#   =CC CC A%CCc              #  h   #    U(       a%  U" U5      =(       d    Sn[        U5       S h  vN   g g  N7fr   r   )r;   r   r6   rb   s       r    r   CrawlSpider._handle_failure   s/      g&,"G,W555 5s   '202c                    / U l         U R                   HP  nU R                   R                  [        R                  " U5      5        U R                   S   R	                  U 5        MR     g )N)rN   rM   appendcopyrA   )r;   rf   s     r    rS   CrawlSpider._compile_rules   sG    JJDKKtyy/KKO$$T* r"   c                r   > [         TU ]  " U/UQ70 UD6nUR                  R                  SS5      Ul        U$ )NCRAWLSPIDER_FOLLOW_LINKST)rR   from_crawlersettingsgetboolrP   )clscrawlerargsr\   r-   rV   s        r    r   CrawlSpider.from_crawler   sA    %g???&//77& 
 r"   )rN   )rT   r   rU   r   )r%   r   r\   r   rC   r   )r%   r   rb   Iterable[Any]rC   r   )rm   r|   rn   r   rC   r   )r%   r   rC   zIterable[Request | None])r%   r   r7   r   rC   r   )r   r   rC   r   )T)
r%   r   r5   zCallbackT | Noner7   zdict[str, Any]r:   rO   rC   zAsyncIterator[Any])r   r   r6   zCallable[[Failure], Any] | NonerC   r   )rC   rD   )r   r   r   r   r\   r   rC   r   )rE   rF   rG   rH   rM   __annotations__r<   r]   rZ   rc   ro   ry   rj   rk   rY   r   rS   classmethodr   rI   __classcell__)rV   s   @r    rK   rK   Z   s    E>
 +8	

U

 && #& "	&
 & 
&*66)H6	6+  r"   rK   )r   r   rC   r   )r$   r   r%   r   rC   zRequest | None)r,   zCallable | str | Noner-   r   rC   zCallable | None)0__doc__
__future__r   r   collections.abcr   r   r   typingr   r   r	   r
   r   twisted.python.failurer   scrapy.httpr   r   r   scrapy.linkr   scrapy.linkextractorsr   scrapy.spidersr   scrapy.utils.asyncgenr   scrapy.utils.spiderr   r   r   typing_extensionsr   scrapy.crawlerr   scrapy.http.requestr   r   listr?   r@   r!   r&   r.   r3   r0   rK   r   r"   r    <module>r      s    #  > > > > * 7 7  / ! 2 52 '&- T]$t*tDz12GX.0AAB (/ 
 
Bg& gr"   