
    !h                        S r SSKJr  SSKJrJr  SSKJrJr  SSK	J
r
Jr  SSKJr  SSKJr  SSKJrJr  SS	KJr  \(       a  SS
KJrJr   " S S\5      r " S S\5      rg)z
This module implements the XMLFeedSpider which is the recommended spider to use
for scraping from an XML feed.

See documentation in docs/topics/spiders.rst
    )annotations)TYPE_CHECKINGAny)NotConfiguredNotSupported)ResponseTextResponse)Selector)Spider)csviterxmliter_lxml)iterate_spider_output)IterableSequencec                      \ rS rSr% SrSrS\S'   SrS\S'   SrS	\S
'         SS jr	SS jr
SS jrSS jrSS jrSS jrSS jrSrg)XMLFeedSpider   a  
This class intends to be the base class for spiders that scrape
from XML feeds.

You can choose whether to parse the file using the 'iternodes' iterator, an
'xml' selector, or an 'html' selector.  In most cases, it's convenient to
use iternodes, since it's a faster and cleaner.
	iternodesstriteratoritemitertag zSequence[tuple[str, str]]
namespacesc                    U$ )aw  This overridable method is called for each result (item or request)
returned by the spider, and it's intended to perform any last time
processing required before returning the results to the framework core,
for example setting the item GUIDs. It receives a list of results and
the response which originated that results. It must return a list of
results (items or requests).
r   selfresponseresultss      Z/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/spiders/feed.pyprocess_resultsXMLFeedSpider.process_results%   s	         c                    U$ )zYou can override this function in order to make any changes you want
to into the feed before parsing it. This function must return a
response.
r   r   r   s     r    adapt_responseXMLFeedSpider.adapt_response1   s	    
 r#   c                R    [        U S5      (       a  U R                  X5      $ [        e)DThis method must be overridden with your custom spider functionality
parse_item)hasattrr*   NotImplementedError)r   r   selectors      r    
parse_nodeXMLFeedSpider.parse_node8   s$    4&&??866!!r#   c              #     #    U H6  n[        U R                  X5      5      nU R                  X5       Sh  vN   M8     g N	7f)a  This method is called for the nodes matching the provided tag name
(itertag). Receives the response and an Selector for each node.
Overriding this method is mandatory. Otherwise, you spider won't work.
This method must return either an item, a request, or a list
containing any of them.
N)r   r.   r!   )r   r   nodesr-   rets        r    parse_nodesXMLFeedSpider.parse_nodes>   s;      H'(KLC++H::: :s   4AA 
Ac                   [        U S5      (       d  [        S5      eU R                  U5      nU R                  S:X  a  U R	                  U5      nOU R                  S:X  aZ  [        U[        5      (       d  [        S5      e[        USS9nU R                  U5        UR                  SU R                   35      nOuU R                  S:X  aZ  [        U[        5      (       d  [        S5      e[        USS9nU R                  U5        UR                  SU R                   35      nO[        S	5      eU R                  X5      $ )
Nr.   zBYou must define parse_node method in order to scrape this XML feedr   xmlzResponse content isn't text)typez//htmlzUnsupported node iterator)r+   r   r&   r   
_iternodes
isinstancer	   
ValueErrorr
   _register_namespacesxpathr   r   r3   )r   r   kwargsr1   r-   s        r    _parseXMLFeedSpider._parseJ   s   t\**T  &&x0==K'OOH-E]]e#h55 !>??u5H%%h/NNR~#67E]]f$h55 !>??v6H%%h/NNR~#67E:;;00r#   c              #  n   #    [        XR                  5       H  nU R                  U5        Uv   M     g 7fN)r   r   r<   )r   r   nodes      r    r9   XMLFeedSpider._iternodese   s+      <<8D%%d+J 9s   35c                P    U R                    H  u  p#UR                  X#5        M     g rB   )r   register_namespace)r   r-   prefixuris       r    r<   "XMLFeedSpider._register_namespacesj   s     ??KF''4 +r#   Nr   r   r   Iterable[Any]returnrK   r   r   rL   r   )r   r   r-   r
   rL   r   )r   r   r1   Iterable[Selector]rL   r   r   r   r>   r   rL   r   )r   r   rL   rN   )r-   r
   rL   None)__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   r!   r&   r.   r3   r?   r9   r<   __static_attributes__r   r#   r    r   r      sc      HcGS,.J).
 
+8
	
"
;16
5r#   r   c                      \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'         SS	 jr	SS
 jr
SS jrSS jrSS jrSrg)CSVFeedSpidero   a  Spider for parsing CSV feeds.
It receives a CSV file in a response; iterates through each of its rows,
and calls parse_row with a dict containing each field's data.

You can set some options regarding the CSV file, such as the delimiter, quotechar
and the file's headers.
Nz
str | None	delimiter	quotecharzlist[str] | Noneheadersc                    U$ z<This method has the same purpose as the one in XMLFeedSpiderr   r   s      r    r!   CSVFeedSpider.process_results   s	     r#   c                    U$ r_   r   r%   s     r    r&   CSVFeedSpider.adapt_response   s    r#   c                    [         e)r)   )r,   )r   r   rows      r    	parse_rowCSVFeedSpider.parse_row   s    !!r#   c              #     #    [        XR                  U R                  U R                  S9 H6  n[	        U R                  X5      5      nU R                  X5       Sh  vN   M8     g N	7f)zReceives a response and a dict (representing each row) with a key for
each provided (or detected) header of the CSV file.  This spider also
gives the opportunity to override adapt_response and
process_results methods for pre and post-processing purposes.
)r\   N)r   r[   r]   r\   r   re   r!   )r   r   rd   r2   s       r    
parse_rowsCSVFeedSpider.parse_rows   sW      nndlldnn
C (x(EFC++H:::	
 ;s   AA)A'
A)c                ~    [        U S5      (       d  [        S5      eU R                  U5      nU R                  U5      $ )Nre   zAYou must define parse_row method in order to scrape this CSV feed)r+   r   r&   rh   )r   r   r>   s      r    r?   CSVFeedSpider._parse   s@    t[))S  &&x0x((r#   r   rJ   rM   )r   r   rd   zdict[str, str]rL   r   )r   r   rL   r   rO   )rQ   rR   rS   rT   rU   r[   rV   r\   r]   r!   r&   re   rh   r?   rW   r   r#   r    rY   rY   o   sg     	 z  	 z  !%G$ +8	";)r#   rY   N)rU   
__future__r   typingr   r   scrapy.exceptionsr   r   scrapy.httpr   r	   scrapy.selectorr
   scrapy.spidersr   scrapy.utils.iteratorsr   r   scrapy.utils.spiderr   collections.abcr   r   r   rY   r   r#   r    <module>ru      sE    # % 9 . $ ! 8 52U5F U5p2)F 2)r#   