
    !h                    $   % S SK Jr  S SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
JrJr  S SKJrJr  S SKJr  \(       a  S S	KJr  S S
KJrJr  S SKJr  S SKJr  \R6                  " \5      r " S S5      r\R>                  " S5      r S\!S'   SS jr"g)    )annotationsN)TYPE_CHECKING)warn)html)NotConfiguredScrapyDeprecationWarning)HtmlResponseResponse)escape_ajax)Self)RequestSpider)Crawler)BaseSettingsc                  Z    \ rS rSrSrS	S jr\S
S j5       r        SS jrSS jr	Sr
g)AjaxCrawlMiddleware   zA
Handle 'AJAX crawlable' pages marked as crawlable via meta tag.
c                    UR                  S5      (       d  [        e[        S[        SS9  UR	                  SS5      U l        g )NAJAXCRAWL_ENABLEDzxscrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware is deprecated and will be removed in a future Scrapy version.   )
stacklevelAJAXCRAWL_MAXSIZEi   )getboolr   r   r   getintlookup_bytes)selfsettingss     m/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/downloadermiddlewares/ajaxcrawl.py__init__AjaxCrawlMiddleware.__init__   sC     344?$		
 "*1De!L    c                &    U " UR                   5      $ )N)r   )clscrawlers     r   from_crawler AjaxCrawlMiddleware.from_crawler0   s    7##$$r!   c                f   [        U[        5      (       a  UR                  S:w  a  U$ UR                  S:w  a  U$ SUR                  ;   a  U$ U R                  U5      (       d  U$ UR                  [        UR                  S-   5      S9n[        R                  SXAS.SU0S	9  S
UR                  S'   U$ )N   GETajax_crawlablez#!)urlzHDownloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s)ajax_crawl_requestrequestspider)extraT)
isinstancer	   statusmethodmeta_has_ajax_crawlable_variantreplacer   r+   loggerdebug)r   r-   responser.   r,   s        r   process_response$AjaxCrawlMiddleware.process_response4   s     (L11X__5KO>>U"Ow||+O//99O$__W[[4=O1P_QV#5JV$ 	 	
 59 01!!r!   c                J    UR                   SU R                   n[        U5      $ )zH
Return True if a page without hash fragment could be "AJAX crawlable".
N)textr   _has_ajaxcrawlable_meta)r   r8   bodys      r   r4   /AjaxCrawlMiddleware._has_ajax_crawlable_variantN   s&     }}0t001&t,,r!   )r   N)r   r   )r$   r   returnr   )r-   r   r8   r
   r.   r   r@   zRequest | Response)r8   r
   r@   bool)__name__
__module____qualname____firstlineno____doc__r   classmethodr%   r9   r4   __static_attributes__ r!   r   r   r      sK    M" % %""*2"<B"	"4-r!   r   z8<meta\s+name=["\']fragment["\']\s+content=["\']!["\']/?>zre.Pattern[str]_ajax_crawlable_rec                    SU ;  a  gSU ;  a  g[         R                  " U S5      n [         R                  " U 5      n [         R                  " U 5      n [        R                  U 5      SL$ )a}  
>>> _has_ajaxcrawlable_meta('<html><head><meta name="fragment"  content="!"/></head><body></body></html>')
True
>>> _has_ajaxcrawlable_meta("<html><head><meta name='fragment' content='!'></head></html>")
True
>>> _has_ajaxcrawlable_meta('<html><head><!--<meta name="fragment"  content="!"/>--></head><body></body></html>')
False
>>> _has_ajaxcrawlable_meta('<html></html>')
False
fragmentFcontent)scriptnoscriptN)r   remove_tags_with_contentreplace_entitiesremove_commentsrJ   search)r<   s    r   r=   r=   [   sc     ((/EFD  &D%D$$T*$66r!   )r<   strr@   rA   )#
__future__r   loggingretypingr   warningsr   w3libr   scrapy.exceptionsr   r   scrapy.httpr	   r
   scrapy.utils.urlr   typing_extensionsr   scrapyr   r   scrapy.crawlerr   scrapy.settingsr   	getLoggerrB   r6   r   compilerJ   __annotations__r=   rI   r!   r   <module>re      sl    "  	     E . (&&&, 
		8	$9- 9-x ')jj?' O 
7r!   