
    !h8!                       % S r SSKJr  SSKrSSKrSSKrSSKJrJrJ	r	  SSK
Jr  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJr  \(       a  SSKJr  SSK J!r!  SSK"J#r#  \" 5       r$S\%S'   SSS.       S!S jjr& " S S\	5      r' " S S5      r(        S"S jr)S#S jr*S$S jr+SS.S%S jjr,S&S jr-S'S  jr.g)(zT
This module provides some useful functions for working with
scrapy.Request objects
    )annotationsN)TYPE_CHECKINGAnyProtocol)
urlunparse)WeakKeyDictionary)basic_auth_header)canonicalize_url)RequestSpider)ScrapyDeprecationWarning)urlparse_cached)load_object)to_bytes
to_unicode)Iterable)Self)CrawlerzNWeakKeyDictionary[Request, dict[tuple[tuple[bytes, ...] | None, bool], bytes]]_fingerprint_cacheF)include_headerskeep_fragmentsc                  SnU(       a  [        S [        U5       5       5      n[        R                  U 0 5      nX24nXT;  Ga  0 nU(       ad  U H^  nXpR                  ;   d  M  U R                  R                  U5       Vs/ s H  nUR                  5       PM     snXgR                  5       '   M`     [        U R                  5      [        U R                  US9U R                  =(       d    SR                  5       US.n	[        R                  " U	SS9n
[        R                  " U
R!                  5       5      R#                  5       XE'   XE   $ s  snf )a  
Return the request fingerprint.

The request fingerprint is a hash that uniquely identifies the resource the
request points to. For example, take the following two urls:
``http://www.example.com/query?id=111&cat=222``,
``http://www.example.com/query?cat=222&id=111``.

Even though those are two different URLs both point to the same resource
and are equivalent (i.e. they should return the same response).

Another example are cookies used to store session ids. Suppose the
following page is only accessible to authenticated users:
``http://www.example.com/members/offers.html``.

Lots of sites use a cookie to store the session id, which adds a random
component to the HTTP Request and thus should be ignored when calculating
the fingerprint.

For this reason, request headers are ignored by default when calculating
the fingerprint. If you want to include specific headers use the
include_headers argument, which is a list of Request headers to include.

Also, servers usually ignore fragments in urls when handling requests,
so they are also ignored by default when calculating the fingerprint.
If you want to include them, set the keep_fragments argument to True
(for instance when handling requests with a headless browser).
Nc              3  T   #    U  H  n[        UR                  5       5      v   M      g 7fN)r   lower).0hs     [/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/utils/request.py	<genexpr>fingerprint.<locals>.<genexpr>J   s"      *
)@AHQWWY)@s   &()r       )methodurlbodyheadersT)	sort_keys)tuplesortedr   
setdefaultr%   getlisthexr   r"   r
   r#   r$   jsondumpshashlibsha1encodedigest)requestr   r   processed_include_headerscache	cache_keyr%   headerheader_valuefingerprint_datafingerprint_jsons              r   fingerprintr:   &   s2   D ;?$) *
)/)@*
 %
! ))'26E*;I )+$3__, -4OO,C,CF,K-,KL %((*,K-GJJL) 4 !0#GKKO\\(S--/	
  ::&6$G"<<##%

&( 	 -s    Ec                      \ rS rSrSS jrSrg)RequestFingerprinterProtocolg   c                    g r    selfr2   s     r   r:   (RequestFingerprinterProtocol.fingerprinth   s    cr!   r?   Nr2   r   returnbytes)__name__
__module____qualname____firstlineno__r:   __static_attributes__r?   r!   r   r<   r<   g   s    9r!   r<   c                  D    \ rS rSrSr\SS j5       rS	S
S jjrSS jrSr	g)RequestFingerprinterk   aM  Default fingerprinter.

It takes into account a canonical version
(:func:`w3lib.url.canonicalize_url`) of :attr:`request.url
<scrapy.Request.url>` and the values of :attr:`request.method
<scrapy.Request.method>` and :attr:`request.body
<scrapy.Request.body>`. It then generates an `SHA1
<https://en.wikipedia.org/wiki/SHA-1>`_ hash.
c                    U " U5      $ r   r?   )clscrawlers     r   from_crawler!RequestFingerprinter.from_crawlerv   s    7|r!   Nc                    U(       a  UR                   R                  S5      nOSnUS:w  a  Sn[        R                  " U[        SS9  [
        U l        g )N$REQUEST_FINGERPRINTER_IMPLEMENTATIONSENTINELzq'REQUEST_FINGERPRINTER_IMPLEMENTATION' is a deprecated setting.
It will be removed in a future version of Scrapy.   category
stacklevel)settingsgetwarningswarnr   r:   _fingerprint)rA   rP   implementationmessages       r   __init__RequestFingerprinter.__init__z   sT    $--116N (NZ'D  MM',DQRS'r!   c                $    U R                  U5      $ r   r^   r@   s     r   r:    RequestFingerprinter.fingerprint   s      ))r!   rd   )rP   r   rD   r   r   )rP   zCrawler | NonerC   )
rF   rG   rH   rI   __doc__classmethodrQ   ra   r:   rJ   r?   r!   r   rL   rL   k   s%      ( *r!   rL   c                h    [         R                  " S[        SS9  [        X5      U R                  S'   g)zAuthenticate the given request (in place) using the HTTP basic access
authentication mechanism (RFC 2617) and the given username and password
zbThe request_authenticate function is deprecated and will be removed in a future version of Scrapy.rV   rW   AuthorizationN)r\   r]   r   r	   r%   )r2   usernamepasswords      r   request_authenticaterl      s.     MMl)
 (9'LGOOO$r!   c                   [        U 5      n[        SSUR                  =(       d    SUR                  UR                  S45      n[        U R                  5      S-   [        U5      -   S-   nUS[        UR                  =(       d    S5      -   S-   -  nU R                  (       a  X0R                  R                  5       S-   -  nUS-  nX0R                  -  nU$ )zReturn the raw HTTP representation (as bytes) of the given request.
This is provided only for reference since it's not the actual stream of
bytes that will be send when performing the request (that's controlled
by Twisted).
 /    s    HTTP/1.1
s   Host: r!   s   
)r   r   pathparamsqueryr   r"   hostnamer%   	to_stringr$   )r2   parsedrq   ss       r   request_httpreprrx      s     W%Fr2v{{1c6==&,,PRSTD 4'(4.8;KKAXfoo45	5	??A	__&&(722LAAHr!   c                V    U R                   R                  S5      nUc  U$ [        USS9$ )z0Return Referer HTTP header suitable for logging.Refererreplace)errors)r%   r[   r   )r2   referrers     r   referer_strr~      s.    ""9-Hhy11r!   )spiderc               t   SU ;   a  [        U S   5      O[        nU R                  5        VVs0 s H  u  p4X2R                  ;   d  M  X4_M     nnnU R	                  S5      (       a  U(       a  [        XS   5      US'   U R	                  S5      (       a  U(       a  [        XS   5      US'   U" S0 UD6$ s  snnf )zCreate a :class:`~scrapy.Request` object from a dict.

If a spider is given, it will try to resolve the callbacks looking at the
spider for methods with the same name.
_classcallbackerrbackr?   )r   r   items
attributesr[   _get_method)dr   request_clskeyvaluekwargss         r   request_from_dictr      s     >F]Qx[!9PWK+,779V9ZS?U?U8Ujcj9FVuuZV(:?zuuYF')=y    Ws   B4B4c                p    [        U5      n [        X5      $ ! [         a    [        SU< SU  35      ef = f)z%Helper function for request_from_dictzMethod z not found in: )strgetattrAttributeError
ValueError)objnames     r   r   r      sE    t9DAs!! A74(/#?@@As   
 5c           
        U R                   nU R                  (       a  SU R                  R                  S5       S3OSnSR                  S U R                  R                  5        5       5      nU R                  nSnU R                  (       a  [        U R                  [        5      (       a7  SR                  S U R                  R                  5        5       5      nS	U S3nOG[        U R                  [        5      (       a(  SR                  S
 U R                   5       5      nS	U S3nSU SU SU SU SU 3
R                  5       nSR                  UR                  5       5      $ )z
Converts a :class:`~scrapy.Request` object to a curl command.

:param :class:`~scrapy.Request`: Request object to be converted
:return: string containing the curl command
z--data-raw 'zutf-8'rn    c              3  v   #    U  H/  u  pS UR                  5        SUS   R                  5        S3v   M1     g7f)z-H 'z: r   r   N)decoder   kvs      r   r   "request_to_curl.<locals>.<genexpr>   s5      :Q$!$qxxzl"QqT[[]O1-:Qs   79z; c              3  4   #    U  H  u  pU S U 3v   M     g7f=Nr?   r   s      r   r   r      s     N6Mda!AaSz6Ms   z
--cookie 'c           	   3     #    U  HL  n[        [        UR                  5       5      5       S [        [        UR                  5       5      5       3v   MN     g7fr   )nextiterkeysvalues)r   cs     r   r   r      sC      (A QVVX'($tAHHJ/?*@)AB(s   AAzcurl -X )r"   r$   r   joinr%   r   r#   cookies
isinstancedictliststripsplit)r2   r"   datar%   r#   r   cookiecurl_cmds           r   request_to_curlr      s5    ^^F=D\\\',,--g67q9rDhh :A//:O:O:Q G ++CGgoot,,YYNgoo6K6K6MNNF"6(!,G..YY   F #6(!,G&3%qay'CIIKH88HNN$%%r!   )r2   r   r   zIterable[bytes | str] | Noner   boolrD   rE   )r2   r   rj   r   rk   r   rD   NonerC   )r2   r   rD   z
str | None)r   zdict[str, Any]r   zSpider | NonerD   r   )r   r   r   r   rD   r   )r2   r   rD   r   )/rf   
__future__r   r.   r,   r\   typingr   r   r   urllib.parser   weakrefr   
w3lib.httpr	   	w3lib.urlr
   scrapyr   r   scrapy.exceptionsr   scrapy.utils.httpobjr   scrapy.utils.miscr   scrapy.utils.pythonr   r   collections.abcr   typing_extensionsr   scrapy.crawlerr   r   __annotations__r:   r<   rL   rl   rx   r~   r   r   r   r?   r!   r   <module>r      s   
 #    / / # % ( & " 6 0 ) 4( '&
     59 	>> 2> 	>
 >B:8 : *  *FMMM M 
	M "2 EI !A&r!   