
    !h=                        S r SSKJr  SSKJrJr  SSKJr  SSKr	\(       a  SSK
JrJr   " S S5      r S
     SS	 jjrg)z
Module for processing Sitemaps.

Note: The main purpose of this module is to provide support for the
SitemapSpider, its API is subject to change without notice.
    )annotations)TYPE_CHECKINGAny)urljoinN)IterableIteratorc                  ,    \ rS rSrSrSS jrSS jrSrg)	Sitemap   zPClass to parse Sitemap (type=urlset) and Sitemap Index
(type=sitemapindex) filesc                0   [         R                  R                  SSSS9n[         R                  R                  XS9U l        U R                  R
                  n[        U[        5      (       d   eSU;   a  UR                  SS5      S   U l	        g UU l	        g )NTF)recoverremove_commentsresolve_entities)parser}   )
lxmletree	XMLParser
fromstring_roottag
isinstancestrsplittype)selfxmltextxmlprts       [/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/utils/sitemap.py__init__Sitemap.__init__   s    zz##$ $ 
 ZZ**7*@
ZZ^^"c""""+."9BHHS!$Q'	"	    c              #  
  #    U R                   R                  5        H  n0 nUR                  5        H  nUR                  n[        U[        5      (       d   eSU;   a  UR                  SS5      S   OUnUS:X  aD  SUR                  ;   a2  UR                  S/ 5      R                  UR                  S5      5        M  M  UR                  (       a  UR                  R                  5       OSX%'   M     SU;   d  M  Uv   M     g 7f)Nr   r   linkhref	alternate loc)r   getchildrenr   r   r   r   attrib
setdefaultappendgettextstrip)r   elemdelr   names         r!   __iter__Sitemap.__iter__    s     JJ**,D "A&&(ff!#s++++/2czsyya(+s6>*["5<<RVVF^L + 24bggmmobAG ) z -s   C4D:	D)r   r   N)r   zstr | bytes)returnzIterator[dict[str, Any]])__name__
__module____qualname____firstlineno____doc__r"   r6   __static_attributes__ r$   r!   r
   r
      s    !=r$   r
   c              #    #    U R                  5        Hq  nUR                  5       R                  5       R                  S5      (       d  M7  UR	                  SS5      S   R                  5       n[        U=(       d    SU5      v   Ms     g7f)zPReturn an iterator over all sitemap urls contained in the given
robots.txt file
zsitemap::r   r)   N)
splitlineslstriplower
startswithr   r1   r   )robots_textbase_urllineurls       r!   sitemap_urls_from_robotsrJ   2   si      &&(;;= ++J77**S!$Q'--/C(.b#.. )s   AB	
?B	)N)rF   r   rG   z
str | Noner8   zIterable[str])r=   
__future__r   typingr   r   urllib.parser   
lxml.etreer   collections.abcr   r   r
   rJ   r?   r$   r!   <module>rP      sK    # %   2 @ .2	/	/ *	/	/r$   