
    !hm                       S SK Jr  S SKrS SKrS SKJrJrJr  S SKJ	r	J
r
Jr  S SKJrJrJr  S SKJr  S SKJr  S SKJrJr  S S	KJrJr  \	(       a  S S
KJr  S SKJr  S SKJr  \R@                  " \!5      r" " S S\5      r#SS jr$SSS jjr%g)    )annotationsN)AsyncIteratorIterableSequence)TYPE_CHECKINGAnycast)RequestResponseXmlResponse)Spider)_DecompressionMaxSizeExceeded)gunzipgzip_magic_number)Sitemapsitemap_urls_from_robots)Self)Crawler)	CallbackTc                     ^  \ rS rSr% SrS\S'   S/rS\S'   S/rS	\S
'   SrS\S'   S\S'   S\S'   \	SU 4S jj5       r
SU 4S jjrSS jrSS jr    SS jrSS jrSS jrSrU =r$ ) SitemapSpider    zSequence[str]sitemap_urls) parsez7Sequence[tuple[re.Pattern[str] | str, str | CallbackT]]sitemap_rulesr   zSequence[re.Pattern[str] | str]sitemap_followFboolsitemap_alternate_linksint	_max_size
_warn_sizec                   > [         TU ]  " U/UQ70 UD6n[        USUR                  R	                  S5      5      Ul        [        USUR                  R	                  S5      5      Ul        U$ )Ndownload_maxsizeDOWNLOAD_MAXSIZEdownload_warnsizeDOWNLOAD_WARNSIZE)superfrom_crawlergetattrsettingsgetintr"   r#   )clscrawlerargskwargsspider	__class__s        ]/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/spiders/sitemap.pyr*   SitemapSpider.from_crawler$   so    %g???"&(>(>?Q(R
 $')?)?@S)T
     c                ^  > [         TU ]  " U0 UD6  / U l        U R                   HU  u  p4[	        U[
        5      (       a  [        S[        X5      5      nU R                  R                  [        U5      U45        MW     U R                   Vs/ s H  n[        U5      PM     snU l        g s  snf )Nr   )r)   __init___cbsr   
isinstancestrr	   r+   appendregexr   _follow)selfakwrcxr3   s         r4   r8   SitemapSpider.__init__/   s    !"r"=?	&&DA!S!!gd&67IIeAh]+ ' BFATAT.UATAuQxAT.U.Us   B*c               D   #    U R                  5        H  nU7v   M
     g 7fN)start_requests)r?   item_or_requests     r4   startSitemapSpider.start8   s     #224O!!  5s    c              #  `   #    U R                    H  n[        XR                  5      v   M     g 7frG   )r   r
   _parse_sitemap)r?   urls     r4   rH   SitemapSpider.start_requests<   s&     $$C#2233 %s   ,.c              #  $   #    U Sh  vN   g N7f)zThis method can be used to filter sitemap entries by their
attributes, for example, you can filter locs with lastmod greater
than a given date (see docs).
Nr   )r?   entriess     r4   sitemap_filterSitemapSpider.sitemap_filter@   s      s   c              #    ^#    UR                   R                  S5      (       a;  [        UR                  UR                   S9 H  n[	        X R
                  S9v   M     g U R                  U5      nUc  [        R                  SSU0SU 0S9  g [        U5      nU R                  U5      nUR                  S:X  aY  [        XPR                  5       H?  m[        U4S	 jU R                   5       5      (       d  M)  [	        TU R
                  S9v   MA     g UR                  S
:X  aW  [        XPR                  5       H=  mU R                    H*  u  pgUR#                  T5      (       d  M  [	        TUS9v     M;     M?     g g 7f)Nz/robots.txt)base_url)callbackz&Ignoring invalid sitemap: %(response)sresponser2   )extrasitemapindexc              3  D   >#    U  H  oR                  T5      v   M     g 7frG   )search).0rD   locs     r4   	<genexpr>/SitemapSpider._parse_sitemap.<locals>.<genexpr>\   s     ?,Q88C==,s    urlset)rN   endswithr   textr
   rM   _get_sitemap_bodyloggerwarningr   rR   typeiterlocr    anyr>   r9   r[   )	r?   rW   rN   bodysitrB   rC   r]   s	           @r4   rM   SitemapSpider._parse_sitemapI   s?    <<  ///Uc,?,?@@ V ))(3D|<*#T*  
 A$$Q'Bvv'"2'C'CDC?$,,???%cD4G4GHH E 8#"2'C'CDC $		88C==")#"::! !* E $s   C0E:7A(E:#E:c                   [        U[        5      (       a  UR                  $ [        U5      (       a  [	        UR                  5      nUR
                  R                  SU R                  5      nUR
                  R                  SU R                  5      n [        UR                  US9nX$s=:  a  [	        U5      ::  a+  O  U$ [        R                  U S[	        U5       SU S35        U$ UR                  R                  S5      (       d   UR                  R                  S	5      (       a  UR                  $ g! [         a     gf = f)
zcReturn the sitemap body contained in the given response,
or None if the response is not a sitemap.
r%   r'   )max_sizeNz  body size after decompression (z. B) is larger than the download warning size (z B).z.xmlz.xml.gz)r:   r   ri   r   lenmetagetr"   r#   r   r   rd   re   rN   ra   )r?   rW   uncompressed_sizern   	warn_sizeri   s         r4   rc   SitemapSpider._get_sitemap_bodye   s    h,,== X&& #HMM 2}}(();T^^LH ))*=tOIhmmh? !9D	9
 K	 j @T LAAJ4Q K <<  ((HLL,A,A),L,L== ' 1 s   D7 7
EE)r9   r>   )r/   r   r0   r   r1   r   returnr   )r@   r   rA   r   )ru   zAsyncIterator[Any])ru   Iterable[Request])rQ   Iterable[dict[str, Any]]ru   rw   )rW   r   ru   rv   )rW   r   ru   zbytes | None)__name__
__module____qualname____firstlineno__r   __annotations__r   r   r    classmethodr*   r8   rJ   rH   rR   rM   rc   __static_attributes____classcell__)r3   s   @r4   r   r      s    "$L-$NMJ  8:dN3:$)T)NO V"4/	!"8 r6   r   c                \    [        U [        5      (       a  [        R                  " U 5      $ U $ rG   )r:   r;   recompile)rD   s    r4   r=   r=      s"    !Szz!}Hr6   c              #  l   #    U  H(  nUS   v   U(       d  M  SU;   d  M  US    S h  vN   M*     g  N	7f)Nr]   	alternater   )rk   altds      r4   rg   rg      s8     h 3;!#~%% 
 &s   4442
4)rD   zre.Pattern[str] | strru   zre.Pattern[str])F)rk   rw   r   r   ru   zIterable[str])&
__future__r   loggingr   collections.abcr   r   r   typingr   r   r	   scrapy.httpr
   r   r   scrapy.spidersr   scrapy.utils._compressionr   scrapy.utils.gzr   r   scrapy.utils.sitemapr   r   typing_extensionsr   scrapy.crawlerr   scrapy.http.requestr   	getLoggerrx   rd   r   r=   rg   r   r6   r4   <module>r      sc    "  	 > = + + 6 6 ! C 5 B&&-			8	$jF jZ&r6   