
    !h                       S SK Jr  S SKrS SKrS SKJrJr  S SKJr  S SK	J
r
  \(       a  S SKJr  S SKJr  S SKJr  \R"                  " \5      r S       SS	 jjr " S
 S\S9r " S S\5      r " S S\5      r " S S\5      rg)    )annotationsN)ABCMetaabstractmethod)TYPE_CHECKING)
to_unicode)Self)Spider)Crawlerc                     U(       a  [        U 5      nU$ U R                  SSS9n U$ ! [         a0    [        R	                  S[
        R                  " 5       SU0S9  Sn U$ f = f)Nzutf-8ignore)errorszFailure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.spider)exc_infoextra )r   decodeUnicodeDecodeErrorloggerwarningsysr   )robotstxt_bodyr   to_native_str_typebody_decodeds       W/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/robotstxt.pydecode_robotstxtr      s    %n5L  *000JL   	 	P\\^V$	 	 	
 	s   ) ) 6A#"A#c                  F    \ rS rSr\\SS j5       5       r\SS j5       rSrg)RobotParser*   c                    g)a@  Parse the content of a robots.txt_ file as bytes. This must be a class method.
It must return a new instance of the parser backend.

:param crawler: crawler which made the request
:type crawler: :class:`~scrapy.crawler.Crawler` instance

:param robotstxt_body: content of a robots.txt_ file.
:type robotstxt_body: bytes
N )clscrawlerr   s      r   from_crawlerRobotParser.from_crawler+           c                    g)zReturn ``True`` if  ``user_agent`` is allowed to crawl ``url``, otherwise return ``False``.

:param url: Absolute URL
:type url: str or bytes

:param user_agent: User agent
:type user_agent: str or bytes
Nr    selfurl
user_agents      r   allowedRobotParser.allowed8   r%   r&   r    Nr"   r
   r   bytesreturnr   r*   str | bytesr+   r2   r0   bool)	__name__
__module____qualname____firstlineno__classmethodr   r#   r,   __static_attributes__r    r&   r   r   r   *   s0    	  	  r&   r   )	metaclassc                  <    \ rS rSrSS jr\SS j5       rS	S jrSrg)
PythonRobotParserD   c                    SSK Jn  X l        [        XSS9nU" 5       U l        U R                  R                  UR                  5       5        g )Nr   )RobotFileParserT)r   )urllib.robotparserr?   r   r   rpparse
splitlines)r)   r   r   r?   r   s        r   __init__PythonRobotParser.__init__E   s:    6%+'SWX#2#4l--/0r&   c                <    U(       d  S OUR                   nU " X#5      $ Nr   r!   r"   r   r   s       r   r#   PythonRobotParser.from_crawlerM       $'..>**r&   c                d    [        U5      n[        U5      nU R                  R                  X!5      $ rG   r   rA   	can_fetchr(   s      r   r,   PythonRobotParser.allowedR   s*    
+
oww  11r&   rA   r   Nr   r/   r   Spider | Noner.   r1   	r4   r5   r6   r7   rD   r8   r#   r,   r9   r    r&   r   r<   r<   D   s     1 + +2r&   r<   c                  <    \ rS rSrSS jr\SS j5       rS	S jrSrg)
RerpRobotParserX   c                    SSK Jn  X l        U" 5       U l        [	        X5      nU R                  R                  U5        g )Nr   )RobotExclusionRulesParser)robotexclusionrulesparserrX   r   rA   r   rB   )r)   r   r   rX   r   s        r   rD   RerpRobotParser.__init__Y   s.    G%+-F-H'?l#r&   c                <    U(       d  S OUR                   nU " X#5      $ rG   rH   rI   s       r   r#   RerpRobotParser.from_crawlera   rK   r&   c                d    [        U5      n[        U5      nU R                  R                  X!5      $ rG   )r   rA   
is_allowedr(   s      r   r,   RerpRobotParser.allowedf   s*    
+
oww!!*22r&   rP   NrQ   r.   r1   rS   r    r&   r   rU   rU   X   s     $ + +3r&   rU   c                  <    \ rS rSrSS jr\SS j5       rS	S jrSrg)
ProtegoRobotParserl   c                ^    SSK Jn  X l        [        X5      nUR	                  U5      U l        g )Nr   )Protego)protegord   r   r   rB   rA   )r)   r   r   rd   r   s        r   rD   ProtegoRobotParser.__init__m   s$    #%+'?---r&   c                <    U(       d  S OUR                   nU " X#5      $ rG   rH   rI   s       r   r#   ProtegoRobotParser.from_crawlert   rK   r&   c                d    [        U5      n[        U5      nU R                  R                  X5      $ rG   rM   r(   s      r   r,   ProtegoRobotParser.allowedy   s*    
+
oww  11r&   rP   NrQ   r.   r1   rS   r    r&   r   ra   ra   l   s     . + +2r&   ra   )F)r   r/   r   rR   r   r3   r0   str)
__future__r   loggingr   abcr   r   typingr   scrapy.utils.pythonr   typing_extensionsr   scrapyr	   scrapy.crawlerr
   	getLoggerr4   r   r   r   r<   rU   ra   r    r&   r   <module>ru      s    "  
 '   *&& 
		8	$ NS#0FJ*G 42 2(3k 3(2 2r&   