
    !hp                    
   S r SSKJr  SSKrSSKJr  SSKJrJr  SSK	J
r
Jr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  \(       a  SSKJr  SSKJr  SSKJr  SSKJr  SSKJ r   \RB                  " \"5      r# " S S5      r$g)z
This is a middleware to respect robots.txt policies. To activate it you must
enable this middleware and enable the ROBOTSTXT_OBEY setting.

    )annotationsN)TYPE_CHECKING)DeferredmaybeDeferred)IgnoreRequestNotConfigured)RequestResponse)NO_CALLBACK)urlparse_cached)failure_to_exc_info)load_object)Failure)Self)Spider)Crawler)RobotParserc                      \ rS rSr% SrS\S'   SS jr\SS j5       r      SS jr	        SS jr
      SS	 jrSS
 jrSS jrSS jrSrg)RobotsTxtMiddleware#   i  intDOWNLOAD_PRIORITYc                   UR                   R                  S5      (       d  [        eUR                   R                  SS5      U l        UR                   R                  SS 5      U l        Xl        0 U l        [        UR                   R                  S5      5      U l	        U R                  R                  U R                  S5        g )NROBOTSTXT_OBEY
USER_AGENTScrapyROBOTSTXT_USER_AGENTROBOTSTXT_PARSER    )settingsgetboolr   get_default_useragent_robotstxt_useragentcrawler_parsersr   _parserimplfrom_crawler)selfr%   s     m/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/downloadermiddlewares/robotstxt.py__init__RobotsTxtMiddleware.__init__&   s    ''(899'.'7'7';';L('S070@0@0D0D"D1
! !(VX(3  !34)

 	%%dllC8r   c                    U " U5      $ N )clsr%   s     r*   r(    RobotsTxtMiddleware.from_crawler6   s    7|r   c                0   UR                   R                  S5      (       a  g UR                  R                  S5      (       d   UR                  R                  S5      (       a  g [	        U R
                  UU5      nUR                  U R                  X5      nU$ )Ndont_obey_robotstxtzdata:zfile:)metar"   url
startswithr   robot_parseraddCallbackprocess_request_2)r)   requestspiderdd2s        r*   process_request#RobotsTxtMiddleware.process_request:   s     <<122;;!!'**gkk.D.DW.M.M*7+

 ]]4+A+A7S	r   c                   Uc  g U R                   nU(       d+  UR                  R                  SU R                  5      nUc   eUR	                  UR
                  U5      (       df  [        R                  SSU0SU0S9  U R                  R                  (       d   eU R                  R                  R                  S5        [        S5      eg )Ns
   User-Agentz$Forbidden by robots.txt: %(request)sr:   r;   )extrazrobotstxt/forbiddenzForbidden by robots.txt)r$   headersr"   r#   allowedr5   loggerdebugr%   stats	inc_valuer   )r)   rpr:   r;   	useragents        r*   r9   %RobotsTxtMiddleware.process_request_2I   s     :(,(A(A	++M4;R;RSI(((zz'++y11LL6G$(  
 <<%%%%LL(()>? 9:: 2r   c                6  ^
 [        U5      nUR                  nX@R                  ;  Ga'  [        5       U R                  U'   UR                   SUR                   S3n[        UU R                  SS0[        S9nU R                  R                  (       d   eU R                  R                  (       d   eU R                  R                  R                  U5      nUR                  U R                  XB5        UR                  U R                  Xb5        UR                  U R                   U5        U R                  R                  R#                  S5        U R                  U   n[%        U[        5      (       a%  [        5       m
SU
4S jjn	UR                  U	5        T
$ U$ )	Nz://z/robots.txtr3   T)priorityr4   callbackzrobotstxt/request_countc                *   > TR                  U 5        U $ r.   )rM   )resultr<   s    r*   cb,RobotsTxtMiddleware.robot_parser.<locals>.cbx   s    

6"r   )rO   RobotParser | NonereturnrR   )r   netlocr&   r   schemer	   r   r   r%   enginerF   downloadr8   _parse_robots
addErrback	_logerror_robots_errorrG   
isinstance)r)   r:   r;   r5   rT   	robotsurl	robotsreqdfdparserrP   r<   s             @r*   r7    RobotsTxtMiddleware.robot_parser]   sC    g&&$,JDMM&!::,c#**[AI//+T2$	I <<&&&&<<%%%%,,%%..y9COOD..?NN4>>9=NN4--v6LL(()BCv&fh''.6jA r"Hr   c                    UR                   [        La-  [        R                  SX!R                  S.[        U5      SU0S9  U$ )Nz.Error downloading %(request)s: %(f_exception)s)r:   f_exceptionr;   )exc_inforA   )typer   rD   errorvaluer   )r)   failurer:   r;   s       r*   rZ   RobotsTxtMiddleware._logerror   sC    <<},LL@#MMB,W5(	   r   c                   U R                   R                  (       d   eU R                   R                  R                  S5        U R                   R                  R                  SUR                   35        U R                  R                  U R                   UR                  5      nU R                  U   n[        U[        5      (       d   eX@R                  U'   UR                  U5        g )Nzrobotstxt/response_countz robotstxt/response_status_count/)r%   rF   rG   statusr'   r(   bodyr&   r\   r   rM   )r)   responserT   r;   rH   rp_dfds         r*   rX   !RobotsTxtMiddleware._parse_robots   s    ||!!!!$$%?@$$.x.?@	
 **4<<Gv&&(++++ "fr   c                X   UR                   [        LaQ  SUR                    3nU R                  R                  (       d   eU R                  R                  R	                  U5        U R
                  U   n[        U[        5      (       d   eS U R
                  U'   UR                  S 5        g )Nzrobotstxt/exception_count/)	re   r   r%   rF   rG   r&   r\   r   rM   )r)   rh   rT   keyrn   s        r*   r[   !RobotsTxtMiddleware._robots_error   s    <<},.w||n=C<<%%%%LL((-v&&(++++ $fr   )r#   r'   r&   r$   r%   N)r%   r   )r%   r   rS   r   )r:   r	   r;   r   rS   zDeferred[None] | None)rH   rR   r:   r	   r;   r   rS   None)r:   r	   r;   r   rS   z1RobotParser | Deferred[RobotParser | None] | None)rh   r   r:   r	   r;   r   rS   r   )rm   r
   rT   strr;   r   rS   rs   )rh   r   rT   rt   rS   rs   )__name__
__module____qualname____firstlineno__r   __annotations__r+   classmethodr(   r>   r9   r7   rZ   rX   r[   __static_attributes__r/   r   r*   r   r   #   s    !s!9   (.	;$;/6;@F;	;(!!(.!	:!F
r   r   )%__doc__
__future__r   loggingtypingr   twisted.internet.deferr   r   scrapy.exceptionsr   r   scrapy.httpr	   r
   scrapy.http.requestr   scrapy.utils.httpobjr   scrapy.utils.logr   scrapy.utils.miscr   twisted.python.failurer   typing_extensionsr   scrapyr   scrapy.crawlerr   scrapy.robotstxtr   	getLoggerru   rD   r   r/   r   r*   <module>r      s\    #    : : ) + 0 0 ). '&, 
		8	${ {r   