
    !hcM                       S SK Jr  S SKrS SKrS SKrS SKrS SKJrJrJ	r	J
r
  S SKJrJrJrJr  S SKJr  S SKJrJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJrJrJ r   S SK!J"r"  S SK#J$r$J%r%J&r&J'r'J(r(J)r)  S SK*J+r+J,r,  S SK-J.r.J/r/  S SK0J1r1J2r2J3r3J4r4  \(       a   S SK5J6r6J7r7  S SK8J9r9  S SK:J;r;  S SK<J=r=  S SK>J?r?  \R                  " \A5      rB\	" S5      rC " S S5      rD " S S5      rE " S S\E5      rFg)    )annotationsN)TYPE_CHECKINGAnyTypeVarcast)DeferredDeferredListinlineCallbacksmaybeDeferred)verifyClass)Spidersignals)AddonManagerExecutionEngine)ExtensionManager)ISpiderLoader)BaseSettingsSettingsoverridden_settings)SignalManager)LogCounterHandlerconfigure_loggingget_scrapy_root_handlerinstall_scrapy_root_handlerlog_reactor_infolog_scrapy_info)build_from_crawlerload_object)install_shutdown_handlerssignal_names)install_reactoris_asyncio_reactor_installed#verify_installed_asyncio_event_loopverify_installed_reactor)	GeneratorIterable)LogFormatter)SpiderLoaderProtocol)StatsCollector)RequestFingerprinterProtocol_Tc                      \ rS rSr  S     SS jjrSS jrSS jr\SS j5       rSS jr	SS jr
\SS	 j5       r\      SS
 j5       rSS jrSS jrSS jrSS jrSS jrSrg)Crawler7   Nc                   [        U[        5      (       a  [        S5      e[        U[        5      (       d  Uc  [	        U5      nXl        UR                  5       U l        U R
                  R                  U R                  5        U R                  5         [        U 5      U l        [        U 5      U l        X0l        SU l        SU l        S U l        S U l        S U l        S U l        S U l        S U l        g )Nz5The spidercls argument must be a class, not an objectF)
isinstancer   
ValueErrordictr   	spiderclscopysettingsupdate_settings_update_root_log_handlerr   addonsr   r   _init_reactorcrawling_started
extensionsstatslogformatterrequest_fingerprinterspiderengine)selfr4   r6   init_reactors       U/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/crawler.py__init__Crawler.__init__8   s     i((TUUh%%)9)H'0"*--/&&t}}5%%'$0$6&3D&9#/##37,0
15JN"%).2    c                F    [        5       b  [        U R                  5        g g N)r   r   r6   rC   s    rE   r8    Crawler._update_root_log_handlerW   s    "$0'6 1rH   c                  ^ U R                   R                  (       a  g U R                  R                  U R                   5        [	        U R                   S   5      " U 5      U l        [        X R                   R                  S5      S9m[        R                  R                  T5        U4S jU l        U R                  R                  U R                  [        R                  5        [	        U R                   S   5      nUR                  U 5      U l        [#        [	        U R                   S   5      U 5      U l        U R                   S   nU R                   S   nU R&                  (       a  U(       a  [)        X#5        OS	S
KJn  U(       a,  [/        U5        [1        5       (       a  U(       a  [3        U5        U R&                  (       d  U(       a
  [5        5         [6        R                  " U 5      U l        U R                   R;                  5         [=        [?        U R                   5      5      n[@        RC                  SS[D        RF                  " U5      05        g )NSTATS_CLASS	LOG_LEVEL)levelc                 B   > [         R                  R                  T 5      $ rJ   )loggingrootremoveHandler)handlers   rE   <lambda>)Crawler._apply_settings.<locals>.<lambda>g   s    (B(B7(KrH   LOG_FORMATTERREQUEST_FINGERPRINTER_CLASSTWISTED_REACTORASYNCIO_EVENT_LOOPr   reactorz!Overridden settings:
%(settings)sr6   )$r6   frozenr9   load_settingsr   r>   r   getrR   rS   
addHandler_Crawler__remove_handlerr   connectengine_stoppedfrom_crawlerr?   r   r@   r:   r"   twisted.internetr]   r%   r#   r$   r   r   r=   freezer3   r   loggerinfopprintpformat)rC   lf_clsreactor_class
event_loopr]   drU   s         @rE   _apply_settingsCrawler._apply_settings\   s   ==!!$--0 }!=>tD
#D0A0A+0NO( !LT22G4J4JK%01O%P"//5%7&CDE&
"
 "]]+<=--(<=
 :4$]3+--*3J?*77=$T]]340:v~~a?P2Q	
rH   c              /  6  #    U R                   (       a  [        S5      eU R                  (       a  [        S5      eS=U l         U l         U R                  " U0 UD6U l        U R                  5         U R                  5         U R                  5       U l        U R                  R                  U R                  5      v   U R                  R                  5       v   g ! [         a2    SU l         U R                  b  U R                  R                  5       v   e f = f7f)NzCrawling already taking placez?Cannot run Crawler.crawl() more than once on the same instance.TF)r;   RuntimeErrorr<   _create_spiderrA   rp   r8   _create_enginerB   open_spiderstart	ExceptioncloserC   argskwargss      rE   crawlCrawler.crawl   s     ==>??==Q  )-,	--t>v>DK  "))+--/DK++))$++66++##%% 	!DM{{&kk''))		s   AD
BC D<DDc                B    U R                   R                  " U /UQ70 UD6$ rJ   )r4   re   rz   s      rE   rt   Crawler._create_spider   s!    ~~**4A$A&AArH   c                $   ^  [        T U 4S j5      $ )Nc                $   > TR                  5       $ rJ   )stop)_rC   s    rE   rV   (Crawler._create_engine.<locals>.<lambda>   s    tyy{rH   r   rK   s   `rE   ru   Crawler._create_engine   s    t%:;;rH   c              #     #    U R                   (       a<  SU l         U R                  (       d   e[        U R                  R                  5      v   gg7f)zgStarts a graceful stop of the crawler and returns a deferred that is
fired when the crawler is stopped.FN)r;   rB   r   r   rK   s    rE   r   Crawler.stop   s;      ==!DM;;; 0 011 s   AAc                >    U H  n[        X 5      (       d  M  Us  $    g rJ   )r1   )component_class
components	components      rE   _get_componentCrawler._get_component   s$     $I)55   $ rH   c                L    U R                  XR                  R                  5      $ )zReturn the run-time instance of an :ref:`add-on <topics-addons>` of
the specified class or a subclass, or ``None`` if none is found.

.. versionadded:: 2.12
)r   r9   rC   clss     rE   	get_addonCrawler.get_addon   s     ""3(:(:;;rH   c                    U R                   (       d  [        S5      eU R                  XR                   R                  R                  R
                  5      $ )aH  Return the run-time instance of a :ref:`downloader middleware
<topics-downloader-middleware>` of the specified class or a subclass,
or ``None`` if none is found.

.. versionadded:: 2.12

This method can only be called after the crawl engine has been created,
e.g. at signals :signal:`engine_started` or :signal:`spider_opened`.
z_Crawler.get_downloader_middleware() can only be called after the crawl engine has been created.)rB   rs   r   
downloader
middlewaremiddlewaresr   s     rE   get_downloader_middleware!Crawler.get_downloader_middleware   sF     {{5  ""3(>(>(I(I(U(UVVrH   c                    U R                   (       d  [        S5      eU R                  XR                   R                  5      $ )a7  Return the run-time instance of an :ref:`extension
<topics-extensions>` of the specified class or a subclass,
or ``None`` if none is found.

.. versionadded:: 2.12

This method can only be called after the extension manager has been
created, e.g. at signals :signal:`engine_started` or
:signal:`spider_opened`.
zXCrawler.get_extension() can only be called after the extension manager has been created.)r=   rs   r   r   r   s     rE   get_extensionCrawler.get_extension   s:     6  ""3(C(CDDrH   c                    U R                   (       d  [        S5      eU R                  XR                   R                  R                  R
                  5      $ )a8  Return the run-time instance of a :ref:`item pipeline
<topics-item-pipeline>` of the specified class or a subclass, or
``None`` if none is found.

.. versionadded:: 2.12

This method can only be called after the crawl engine has been created,
e.g. at signals :signal:`engine_started` or :signal:`spider_opened`.
zWCrawler.get_item_pipeline() can only be called after the crawl engine has been created.)rB   rs   r   scraperitemprocr   r   s     rE   get_item_pipelineCrawler.get_item_pipeline   F     {{1  ""3(;(;(D(D(P(PQQrH   c                    U R                   (       d  [        S5      eU R                  XR                   R                  R                  R
                  5      $ )a@  Return the run-time instance of a :ref:`spider middleware
<topics-spider-middleware>` of the specified class or a subclass, or
``None`` if none is found.

.. versionadded:: 2.12

This method can only be called after the crawl engine has been created,
e.g. at signals :signal:`engine_started` or :signal:`spider_opened`.
z[Crawler.get_spider_middleware() can only be called after the crawl engine has been created.)rB   rs   r   r   spidermwr   r   s     rE   get_spider_middlewareCrawler.get_spider_middleware   r   rH   )__remove_handlerr:   r<   r9   r;   rB   r=   r?   r@   r6   r   rA   r4   r>   NF)r4   ztype[Spider]r6    dict[str, Any] | Settings | NonerD   bool)returnNone)r{   r   r|   r   r   #Generator[Deferred[Any], Any, None])r{   r   r|   r   r   r   )r   r   r   r   )r   type[_T]r   zIterable[Any]r   	_T | None)r   r   r   r   )__name__
__module____qualname____firstlineno__rF   r8   rp   r
   r}   rt   ru   r   staticmethodr   r   r   r   r   r   __static_attributes__ rH   rE   r.   r.   7   s     6:"	33 33 	3>7
-
d  ,B< 2 2 !/<	 <W"E$R"RrH   r.   c                      \ rS rSrSr\" S SS9r\SS j5       rSSS jjr	        SS	 jr
SS
 jr    SS jrSS jrSS jr\SS j5       rSrg)CrawlerRunneri  a  
This is a convenient helper class that keeps track of, manages and runs
crawlers inside an already setup :mod:`~twisted.internet.reactor`.

The CrawlerRunner object must be instantiated with a
:class:`~scrapy.settings.Settings` object.

This class shouldn't be needed (since Scrapy is responsible of using it
accordingly) unless writing scripts that manually handle the crawling
process. See :ref:`run-from-script` for an example.
c                    U R                   $ rJ   )	_crawlersrK   s    rE   rV   CrawlerRunner.<lambda>  s    T^^rH   zeSet of :class:`crawlers <scrapy.crawler.Crawler>` started by :meth:`crawl` and managed by this class.)docc                    U R                  S5      n[        U5      n[        [        U5        [	        SUR                  U R                  5       5      5      $ )z'Get SpiderLoader instance from settingsSPIDER_LOADER_CLASSr)   )r`   r   r   r   r   from_settings
frozencopy)r6   cls_path
loader_clss      rE   _get_spider_loader CrawlerRunner._get_spider_loader  sM     << 56 *
M:."J$<$<X=P=P=R$S
 	
rH   Nc                    [        U[        5      (       d  Uc  [        U5      n[        R                  " U5        Xl        U R                  U5      U l        [        5       U l	        [        5       U l
        SU l        g r   )r1   r3   r   r   load_pre_crawler_settingsr6   r   spider_loadersetr   _activebootstrap_failed)rC   r6   s     rE   rF   CrawlerRunner.__init__(  s^    h%%)9)H..x8"*373J3J83T'*u,/E %rH   c                    [        U[        5      (       a  [        S5      eU R                  U5      nU R                  " U/UQ70 UD6$ )a  
Run a crawler with the provided arguments.

It will call the given Crawler's :meth:`~Crawler.crawl` method, while
keeping track of it so it can be stopped later.

If ``crawler_or_spidercls`` isn't a :class:`~scrapy.crawler.Crawler`
instance, this method will try to create one using this parameter as
the spider class given to it.

Returns a deferred that is fired when the crawling is finished.

:param crawler_or_spidercls: already created crawler, or a spider class
    or spider's name inside the project to create it
:type crawler_or_spidercls: :class:`~scrapy.crawler.Crawler` instance,
    :class:`~scrapy.spiders.Spider` subclass or string

:param args: arguments to initialize the spider

:param kwargs: keyword arguments to initialize the spider
lThe crawler_or_spidercls argument cannot be a spider object, it must be a spider class (or a Crawler object))r1   r   r2   create_crawler_crawl)rC   crawler_or_spiderclsr{   r|   crawlers        rE   r}   CrawlerRunner.crawl2  sQ    6 *F33B  %%&:;{{74T4V44rH   c                   ^ ^^ T R                   R                  T5        TR                  " U0 UD6mT R                  R                  T5        SUUU 4S jjnTR	                  U5      $ )Nc                   > TR                   R                  T5        TR                  R                  T5        T=R                  [	        TSS 5      (       + -  sl        U $ )NrA   )crawlersdiscardr   r   getattr)resultr   ro   rC   s    rE   _done#CrawlerRunner._crawl.<locals>._doneZ  sI    MM!!'*LL  #!!(D)I%II!MrH   )r   r,   r   r,   )r   addr}   r   addBoth)rC   r   r{   r|   r   ro   s   ``   @rE   r   CrawlerRunner._crawlU  sU    '"MM4*6*	 	 yyrH   c                    [        U[        5      (       a  [        S5      e[        U[        5      (       a  U$ U R	                  U5      $ )a  
Return a :class:`~scrapy.crawler.Crawler` object.

* If ``crawler_or_spidercls`` is a Crawler, it is returned as-is.
* If ``crawler_or_spidercls`` is a Spider subclass, a new Crawler
  is constructed for it.
* If ``crawler_or_spidercls`` is a string, this function finds
  a spider with this name in a Scrapy project (using spider loader),
  then creates a Crawler instance for it.
r   )r1   r   r2   r.   _create_crawler)rC   r   s     rE   r   CrawlerRunner.create_crawlerb  sM     *F33B  *G44''##$899rH   c                    [        U[        5      (       a  U R                  R                  U5      n[	        XR
                  5      $ rJ   )r1   strr   loadr.   r6   )rC   r4   s     rE   r   CrawlerRunner._create_crawlerx  s4    i%%**//	:Iy--00rH   c                    [        [        U R                  5       Vs/ s H  oR                  5       PM     sn5      $ s  snf )zv
Stops simultaneously all the crawling jobs taking place.

Returns a deferred that is fired when they all have ended.
)r	   listr   r   )rC   cs     rE   r   CrawlerRunner.stop}  s0     tDMM/BC/B!VVX/BCDDCs   =c              #     #    U R                   (       a+  [        U R                   5      v   U R                   (       a  M*  gg7f)zm
join()

Returns a deferred that is fired when all managed :attr:`crawlers` have
completed their executions.
N)r   r	   rK   s    rE   joinCrawlerRunner.join  s(      llt||,, llls
   :A A )r   r   r   r6   r   )r6   r   r   r)   rJ   )r6   r   )r   type[Spider] | str | Crawlerr{   r   r|   r   r   Deferred[None])r   r.   r{   r   r|   r   r   r   )r   r   r   r.   )r4   zstr | type[Spider]r   r.   r   zDeferred[Any]r   )r   r   r   r   __doc__propertyr   r   r   rF   r}   r   r   r   r   r
   r   r   r   rH   rE   r   r     s    
 #3H 
 
&!5:!5 !5 	!5
 
!5F :$@:	:,1
E - -rH   r   c                     ^  \ rS rSrSr  S   SU 4S jjjrSS jrSS jrSS jr S     SS jjr	SS jr
SSS	 jjrS
rU =r$ )CrawlerProcessi  a\  
A class to run multiple scrapy crawlers in a process simultaneously.

This class extends :class:`~scrapy.crawler.CrawlerRunner` by adding support
for starting a :mod:`~twisted.internet.reactor` and handling shutdown
signals, like the keyboard interrupt command Ctrl-C. It also configures
top-level logging.

This utility should be a better fit than
:class:`~scrapy.crawler.CrawlerRunner` if you aren't running another
:mod:`~twisted.internet.reactor` within your application.

The CrawlerProcess object must be instantiated with a
:class:`~scrapy.settings.Settings` object.

:param install_root_handler: whether to install root logging handler
    (default: True)

This class shouldn't be needed (since Scrapy is responsible of using it
accordingly) unless writing scripts that manually handle the crawling
process. See :ref:`run-from-script` for an example.
c                   > [         TU ]  U5        [        U R                  U5        [	        U R                  5        SU l        g r   )superrF   r   r6   r   _initialized_reactor)rC   r6   install_root_handler	__class__s      rE   rF   CrawlerProcess.__init__  s5    
 	"$--)=>&*/!rH   c                    SSK Jn  [        U R                  5        [        U   n[
        R                  SSU05        UR                  U R                  5        g )Nr   r\   zDReceived %(signame)s, shutting down gracefully. Send again to force signame)	rf   r]   r    _signal_killr!   rh   ri   callFromThread_graceful_stop_reactorrC   signumr   r]   r   s        rE   _signal_shutdownCrawlerProcess._signal_shutdown  sJ    ,!$"3"34v&R 	
 	t::;rH   c                    SSK Jn  [        [        R                  5        [
        U   n[        R                  SSU05        UR                  U R                  5        g )Nr   r\   z4Received %(signame)s twice, forcing unclean shutdownr   )
rf   r]   r    signalSIG_IGNr!   rh   ri   r   _stop_reactorr   s        rE   r   CrawlerProcess._signal_kill  sH    ,!&..1v&BYPWDX	
 	t112rH   c                    [        U[        5      (       a  U R                  R                  U5      nU R                  (       + nSU l        [        XR                  US9$ )NT)rD   )r1   r   r   r   r   r.   r6   )rC   r4   rD   s      rE   r   CrawlerProcess._create_crawler  sL    i%%**//	:I444$(!y--lKKrH   c                   SSK Jn  U(       a=  U R                  5       nUR                  (       a  gUR	                  U R
                  5        [        U R                  S   5      n[        XPUS9nUR                  5         UR                  5       nUR                  U R                  R                  S5      S9  UR                  SSU R                  5        U(       a"  UR                  S	S
[        U R                   5        UR#                  US9  g)a  
This method starts a :mod:`~twisted.internet.reactor`, adjusts its pool
size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache
based on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`.

If ``stop_after_crawl`` is True, the reactor will be stopped after all
crawlers have finished, using :meth:`join`.

:param bool stop_after_crawl: stop or not the reactor when all
    crawlers have finished

:param bool install_signal_handlers: whether to install the OS signal
    handlers from Twisted and Scrapy (default: True)
r   r\   NDNS_RESOLVERREACTOR_THREADPOOL_MAXSIZE)
maxthreadsbeforeshutdownafterstartup)installSignalHandlers)rf   r]   r   calledr   r  r   r6   r   install_on_reactorgetThreadPooladjustPoolsizegetintaddSystemEventTriggerr   r    r  run)rC   stop_after_crawlinstall_signal_handlersr]   ro   resolver_classresolvertps           rE   rw   CrawlerProcess.start  s    " 	-		AxxIId(()$T]]>%BC &nGL##%""$
T]]%9%9:V%WX%%h
DIIF"))$=t?T?T 	*ABrH   c                \    U R                  5       nUR                  U R                  5        U$ rJ   )r   r   r  )rC   ro   s     rE   r   %CrawlerProcess._graceful_stop_reactor  s$    IIK			$$$%rH   c                    SSK Jn  [        R                  " [        5         UR                  5         S S S 5        g ! , (       d  f       g = f)Nr   r\   )rf   r]   
contextlibsuppressrs   r   )rC   r   r]   s      rE   r  CrawlerProcess._stop_reactor  s*    ,   .LLN /..s	   ;
A	)r   )NT)r6   r   r   r   )r   intr   r   r   r   )r4   ztype[Spider] | strr   r.   )TT)r  r   r  r   r   r   r   rJ   )r   r   r   r   )r   r   r   r   r   rF   r  r   r   rw   r   r  r   __classcell__)r   s   @rE   r   r     st    2 6:%)020 #0 0	<3L NR&C $&CFJ&C	&CP
 rH   r   )G
__future__r   r#  rR   rj   r  typingr   r   r   r   twisted.internet.deferr   r	   r
   r   zope.interface.verifyr   scrapyr   r   scrapy.addonsr   scrapy.core.enginer   scrapy.extensionr   scrapy.interfacesr   scrapy.settingsr   r   r   scrapy.signalmanagerr   scrapy.utils.logr   r   r   r   r   r   scrapy.utils.miscr   r   scrapy.utils.ossignalr    r!   scrapy.utils.reactorr"   r#   r$   r%   collections.abcr&   r'   scrapy.logformatterr(   scrapy.spiderloaderr)   scrapy.statscollectorsr*   scrapy.utils.requestr+   	getLoggerr   rh   r,   r.   r   r   r   rH   rE   <module>r=     s    "     4 4  . " & . - + G G .  > I  3085A 
		8	$T]QR QRhC- C-Lq] qrH   