
    h^                       S r SSKJr  SSKrSSKrSSKrSSKrSSKrSSKrSSK	J
r
  SSKJrJrJrJr  SSKJr  SSKJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJ r   S	S
K!J"r"J#r#  S	SK$J%r%J&r&  S	SK'J(r(  S	SK)J*r*  S@S jr+\RX                  " S\+5        Sr-Sr.\-\.-   r/\R`                  \Rb                  -   S-   Re                  S5      r3Sr4\3\.-   S-   r5\/\3-   \4-   S-   r6\6Ro                  SS5      r8\5Rs                  SSS9r:\6Rs                  SSS9r;\;r<\;Rs                  SSS9r=\;r>\" V s0 s H  n \?" U 5      S_M     sn r@SAS jrA   SB         SCS jjrB\R                  " S5      rD SD       SES  jjrESFS! jrF\  SG         SHS" jj5       rG\ SI         SJS# jj5       rG  SG         SKS$ jjrG      SL               SMS% jjrHSNS& jrISOS' jrJSPS( jrKSQS) jrLSRS* jrMSSS+ jrN\O" \P" \Q\R" S,5      5      5      rSS-R                  \R                  " S.R                  \S\O" \P" \Q\R" SS/5      5      5      -
  \O" S05      -
  5      5      5      rWS1R                  \R                  " S.R                  \S1 S2k-
  5      5      \R                  " S.R                  \S5      5      5      rX\R                  " S3R                  \WS49Re                  5       5      rY\R                  " S5R                  \W\XS69Re                  5       5      rZ " S7 S8\5      r[STS9 jr\/ S:Qr] SD       SUS; jjr^   SV         SWS< jjr_SXS= jr` SY     SZS> jjra S[     S\S? jjrbgs  sn f )]zW
This module contains general purpose URL functions not found in the standard
library.
    )annotationsN)Sequence)Callable
NamedTuplecastoverload)_coerce_args)ParseResultparse_qs	parse_qslquoteunquoteunquote_to_bytes	urldefrag	urlencodeurlparseurlsplit
urlunparse
urlunsplit)pathname2urlurl2pathname   )_ASCII_TAB_OR_NEWLINE_C0_CONTROL_OR_SPACE)AnyUnicodeError
StrOrBytes)_SPECIAL_SCHEMES)
to_unicodec                    [        [        U 5      n [        [        U R                  U R
                  U R                   5      5      U R                  4$ N)r   r   r   r   objectstartend)errors    P/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/w3lib/url.py_quote_byter&   (   s:    %(EuU\\%++		BCDeiiPP    percentencodes   :/?#[]@s   !$&'()*+,;=z-._~ascii   |   :   %   #r'   s   :;=)deletes   #[]|   'c                R    U R                  [        5      R                  [        5      $ r    )stripr   	translate'_ASCII_TAB_OR_NEWLINE_TRANSLATION_TABLE)urls    r%   _stripr5   P   s"    99)*44/ r'   c                *   [        XSS9n[        [        U5      5      nUR                  UR                  UR
                  UR                  4u  pgpSn
Uc  Ubh  Ub,  [        [        U5      [        5      nXR                  U5      -  n
Ub1  U
S-  n
[        [        U5      [        5      nXR                  U5      -  n
U
S-  n
Ub   XR                  S5      -  n
U	b"  U
S-  n
U
[        U	5      R                  U5      -  n
U
R                  5       nU(       a*  [        UR                  R                  U5      [        5      nOUR                  nUR                   ["        ;   a*  [        UR$                  R                  U5      [&        5      nO)[        UR$                  R                  U5      [(        5      n[+        UR                   UUU[        UR,                  R                  U5      [.        5      45      $ ! [         a    XR                  U5      -  n
 GN:f = f)u  Return a URL equivalent to *url* that a wide range of web browsers and
web servers consider valid.

*url* is parsed according to the rules of the `URL living standard`_,
and during serialization additional characters are percent-encoded to make
the URL valid by additional URL standards.

.. _URL living standard: https://url.spec.whatwg.org/

The returned URL should be valid by *all* of the following URL standards
known to be enforced by modern-day web browsers and web servers:

-   `URL living standard`_

-   `RFC 3986`_

-   `RFC 2396`_ and `RFC 2732`_, as interpreted by `Java 8’s java.net.URI
    class`_.

.. _Java 8’s java.net.URI class: https://docs.oracle.com/javase/8/docs/api/java/net/URI.html
.. _RFC 2396: https://www.ietf.org/rfc/rfc2396.txt
.. _RFC 2732: https://www.ietf.org/rfc/rfc2732.txt
.. _RFC 3986: https://www.ietf.org/rfc/rfc3986.txt

If a bytes URL is given, it is first converted to `str` using the given
encoding (which defaults to 'utf-8'). If quote_path is True (default),
path_encoding ('utf-8' by default) is used to encode URL path component
which is then quoted. Otherwise, if quote_path is False, path component
is not encoded or quoted. Given encoding is used for query string
or form data.

When passing an encoding, you should use the encoding of the
original page (the page from which the URL was extracted from).

Calling this function on an already "safe" URL will return the URL
unmodified.
r(   )encodingerrorsr'   r+      @idna)r   r   r5   usernamepasswordhostnameportr   r   _USERINFO_SAFEST_CHARSencodeUnicodeErrorstrdecodepath_PATH_SAFEST_CHARSschemer   query_SPECIAL_QUERY_SAFEST_CHARS_QUERY_SAFEST_CHARSr   fragment_FRAGMENT_SAFEST_CHARS)r4   r7   path_encoding
quote_pathdecodedpartsr;   r<   r=   r>   netloc_bytessafe_usernamesafe_passwordnetlocrD   rG   s                   r%   safe_url_stringrT   V   s   b HGVG_%E 	

	*&H Lx3!'("35KLM00::LD L!'("35KLM00::L	6OOF33L
 D	((22  "F UZZ&&}57IJzz||''ekk((24OPekk((24GHLL%..''13IJ	
 +  	6 OOH55L	6s   G1 1HHz
/?(\.\./)+c                   [        XU5      n[        U5      u  pEpgnU(       a\  [        R                  S[        R
                  " U5      5      nUR                  S5      (       a  UR                  S5      (       d  US-  nOSn[        XEXgS45      $ )zMake a url for download. This will call safe_url_string
and then strip the fragment, if one exists. The path will
be normalised.

If the path is outside the document root, it will be changed
to be within the document root.
 /)rT   r   _parent_dirssub	posixpathnormpathendswithr   )	r4   r7   rL   safe_urlrF   rS   rD   rG   _s	            r%   safe_download_urlr_      s     sm<H%-h%7"FDI$6$6t$<=S!!$--*<*<CKDvtB788r'   c                0    U R                  S5      S   S;   $ )Nz://r   )filehttphttps)	partition)texts    r%   is_urlrf      s    >>% #'@@@r'   c                    g r     r4   	parameterdefaultkeep_blank_valuess       r%   url_query_parameterrm      s     r'   c                    g r    rh   ri   s       r%   rm   rm      s     r'   c                n    [        [        [        U 5      5      S   [        U5      S9nX;   a  XA   S   $ U$ )ap  Return the value of a url parameter, given the url and parameter name

General case:

>>> import w3lib.url
>>> w3lib.url.url_query_parameter("product.html?id=200&foo=bar", "id")
'200'
>>>

Return a default value if the parameter is not found:

>>> w3lib.url.url_query_parameter("product.html?id=200&foo=bar", "notthere", "mydefault")
'mydefault'
>>>

Returns None if `keep_blank_values` not set or 0 (default):

>>> w3lib.url.url_query_parameter("product.html?id=", "id")
>>>

Returns an empty string if `keep_blank_values` set to 1:

>>> w3lib.url.url_query_parameter("product.html?id=", "id", keep_blank_values=1)
''
>>>

   rl   r   )r   r   rB   bool)r4   rj   rk   rl   queryparamss        r%   rm   rm      sC    D S16G1HK %a((Nr'   c                `   [        U[        [        45      (       a  U/n[        U 5      u  p[	        [        U 5      n [	        [        U5      nU R                  S5      u  pn
[        5       n/ nU
R                  U5       Hm  nU(       d  M  UR                  U5      u  n  n	U(       a  X;   a  M/  U(       a  X;   a  M=  U(       d  X;  a  MK  UR                  U5        UR                  U5        Mo     U(       a!  SR                  XR                  U5      /5      OUn U(       a  U(       a  U SU-   -  n U $ )a]  Clean URL arguments leaving only those passed in the parameterlist keeping order

>>> import w3lib.url
>>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ('id',))
'product.html?id=200'
>>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id', 'name'])
'product.html?id=200&name=wired'
>>>

If `unique` is ``False``, do not remove duplicated keys

>>> w3lib.url.url_query_cleaner("product.html?d=1&e=b&d=2&d=3&other=other", ['d'], unique=False)
'product.html?d=1&d=2&d=3'
>>>

If `remove` is ``True``, leave only those **not in parameterlist**.

>>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id'], remove=True)
'product.html?foo=bar&name=wired'
>>> w3lib.url.url_query_cleaner("product.html?id=2&foo=bar&name=wired", ['id', 'foo'], remove=True)
'product.html?name=wired'
>>>

By default, URL fragments are removed. If you need to preserve fragments,
pass the ``keep_fragments`` argument as ``True``.

>>> w3lib.url.url_query_cleaner('http://domain.tld/?bla=123#123123', ['bla'], remove=True, keep_fragments=True)
'http://domain.tld/#123123'

?#)
isinstancerB   bytesr   r   rd   setsplitappendaddjoin)r4   parameterlistsepkvsepremoveuniquekeep_fragmentsrJ   baser^   rG   seen	querylistksvks                  r%   url_query_cleanerr     s    P -#u..&cNMC
sC.CC"H]]3'NDU5DI{{3--&1aaia(!0   4=#((D((9-.
/$C(sX~Jr'   c                   [        U 5      n[        UR                  SS9n/ n[        5       nU HK  u  pgXa;  a  UR	                  Xg45        M  Xe;  d  M%  UR	                  XaU   45        UR                  U5        MM     UR                  5        VVs/ s H  u  pgXe;  d  M  Xg4PM     nnnXH-  n[        U5      n	[        UR                  U	S95      $ s  snnf )NTrq   )rG   )
r   r   rG   ry   r{   r|   itemsr   r   _replace)
r4   paramsparsedcurrent_argsnew_argsseen_paramsnamevaluenot_modified_argsrG   s
             r%   _add_or_replace_parametersr   W  s    c]FV\\TBLH%K#OOTM*$OOT$<01OOD! $ *0)7+$4;R   !HhEfooEo233s   CCc                    [        XU05      $ )a*  Add or remove a parameter to a given url

>>> import w3lib.url
>>> w3lib.url.add_or_replace_parameter('http://www.example.com/index.php', 'arg', 'v')
'http://www.example.com/index.php?arg=v'
>>> w3lib.url.add_or_replace_parameter('http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3', 'arg4', 'v4')
'http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3&arg4=v4'
>>> w3lib.url.add_or_replace_parameter('http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3', 'arg3', 'v3new')
'http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3new'
>>>

r   )r4   r   	new_values      r%   add_or_replace_parameterr   m  s     &c)+<==r'   c                    [        X5      $ )a  Add or remove a parameters to a given url

>>> import w3lib.url
>>> w3lib.url.add_or_replace_parameters('http://www.example.com/index.php', {'arg': 'v'})
'http://www.example.com/index.php?arg=v'
>>> args = {'arg4': 'v4', 'arg3': 'v3new'}
>>> w3lib.url.add_or_replace_parameters('http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3', args)
'http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3new&arg4=v4'
>>>

r   )r4   new_parameterss     r%   add_or_replace_parametersr   }  s     &c::r'   c                z    [        [        R                  R                  U 5      5      nSUR	                  S5       3$ )zoConvert local filesystem path to legal File URIs as described in:
http://en.wikipedia.org/wiki/File_URI_scheme
zfile:///rW   )r   osrD   abspathlstrip)rD   xs     r%   path_to_file_urir     s0     	RWW__T*+Aahhsm_%%r'   c                B    [        U 5      R                  n[        U5      $ )zeConvert File URI to local filesystem path according to:
http://en.wikipedia.org/wiki/File_URI_scheme
)r   rD   r   )uriuri_paths     r%   file_uri_to_pathr     s     }!!H!!r'   c                    [         R                  R                  U 5      S   (       a  [        U 5      $ [	        U 5      nUR
                  (       a  U $ [        U 5      $ )zJIf given a path name, return its File URI, otherwise return it
unmodified
r   )r   rD   
splitdriver   r   rF   )uri_or_pathus     r%   
any_to_urir     sG     
ww+&q),,A((;E(8(EEr'      z[{}]+rV       z()<>@,;:\"/[]?= z(?:[{}]|(?:\\[{}]))*>   "\z{token}/{token})tokenz%;({token})=(?:({token})|"({quoted})"))r   quotedc                  8    \ rS rSr% SrS\S'   S\S'   S\S'   S	rg
)ParseDataURIResulti  z/Named tuple returned by :func:`parse_data_uri`.rB   
media_typedict[str, str]media_type_parametersrx   datarh   N)__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__rh   r'   r%   r   r     s    9 O))
Kr'   r   c                   [        U [        5      (       d  [        U 5      R                  S5      n  U R	                  SS5      u  pUR                  5       S:w  a  [        S5      e[        U 5      n Sn0 n[        R                  U 5      nU(       a1  UR                  5       R                  5       nXR                  5       S n OS	US
'    [        R                  U 5      nU(       ae  UR                  5       u  pVnU(       a  [        R                   " SSU5      nUR                  5       X5R                  5       '   XR                  5       S n OOM   U R	                  SS5      u  pU(       a'  US:w  a  [        S5      e["        R$                  " U	5      n	['        X#U	5      $ ! [
         a    [        S5      ef = f! [
         a    [        S5      ef = f)z3Parse a data: URI into :class:`ParseDataURIResult`.r)   r+   r   zinvalid URIs   dataznot a data URIz
text/plainNzUS-ASCIIcharsets   \\(.)s   \1   ,zinvalid data URIs   ;base64)rw   rx   rT   r@   rz   
ValueErrorlowerr   _mediatype_patternmatchgrouprC   r#   _mediatype_parameter_patterngroupsrerY   base64	b64decoder   )
r   rF   r   media_type_paramsm	attributer   value_quoted	is_base64r   s
             r%   parse_data_urir     s   c5!!c"))'2(iia( ||~ )** 3
CJ  %AWWY%%'
%%')n'1)$
(..s3-.XXZ*Ily&,?49LLN..01eegi.C -))D!,	 
"/00%jTBBY  (''(J  -+,,-s   F F2 F/2G)r   r   r   canonicalize_urlr   rf   r   r   r_   rT   r   rm   c           
         U R                   R                  S5      R                  5       nU R                  U[        U R                  R                  U5      [        5      [        U R                  R                  U5      [        5      [        U R                  R                  U5      [        5      [        U R                  R                  U5      [        5      4$ ! [         a    U R                   n Nf = f)Nr:   )rS   r@   rC   rA   rF   r   rD   _path_safe_charsr   _safe_charsrG   rJ   )rO   r7   rL   rS   s       r%   _safe_ParseResultr     s    
$$V,335
 	ejj.0@Aell!!-0+>ekk  *K8enn##H-{;   s   )C C21C2c                &   [        U [        5      (       a  [        U 5      n  [        [	        U 5      U=(       d    SS9u  pEpgp[        X5      n
U
R                  5         [        U
5      n[        U5      n[        U[        5      =(       d    SnU(       d  SOU	n	UR                  S5      nUS   R                  5       R                  S5      US'   SR                  U5      n[!        XEXgX45      $ ! [
         a    [        [	        U 5      SS9u  pEpgp Nf = f)a  Canonicalize the given url by applying the following procedures:

- make the URL safe
- sort query arguments, first by key, then by value
- normalize all spaces (in query arguments) '+' (plus symbol)
- normalize percent encodings case (%2f -> %2F)
- remove query arguments with blank values (unless `keep_blank_values` is True)
- remove fragments (unless `keep_fragments` is True)

The url passed can be bytes or unicode, while the url returned is
always a native str (bytes in Python 2, unicode in Python 3).

>>> import w3lib.url
>>>
>>> # sorting query arguments
>>> w3lib.url.canonicalize_url('http://www.example.com/do?c=3&b=5&b=2&a=50')
'http://www.example.com/do?a=50&b=2&b=5&c=3'
>>>
>>> # UTF-8 conversion + percent-encoding of non-ASCII characters
>>> w3lib.url.canonicalize_url('http://www.example.com/r\u00e9sum\u00e9')
'http://www.example.com/r%C3%A9sum%C3%A9'
>>>

For more examples, see the tests in `tests/test_url.py`.
utf8)r7   rW   rV   @:)rw   rB   r5   r   	parse_urlUnicodeEncodeErrorparse_qsl_to_bytessortr   _unquotepathr   r   rz   r   rstripr}   r   )r4   rl   r   r7   rF   rS   rD   r   rG   rJ   keyvalsuqpnetloc_partss                r%   r   r   /  s	   H #sSk
8IcNX%79
5eF !:GLLNgE t
C&'.3D'rXH <<$L#B'--/66s;LXXl#F vtUEFFe  
8IcNV9
5eX
s    C, ,!DDc                v    S H)  nU R                  SU-   SUR                  5       -   5      n M+     [        U 5      $ )N)2f2F3f3F%z%25)replaceupperr   )rD   reserveds     r%   r   r     s9    ,||C(NEHNN4D,DE - D!!r'   c                X    [        U [        5      (       a  U $ [        [        X5      5      $ )zTReturn urlparsed url from the given argument (which could be an already
parsed url)
)rw   r
   r   r   )r4   r7   s     r%   r   r     s&     #{##
Js-..r'   c           	        [        [        S[        [        [        S[        4   4   4   [
        5      nU" U 5      u  pU R                  S5       VVs/ s H  oDR                  S5        H  oUPM     M     nnn/ nU H  nU(       d  M  UR                  SS5      n	[        U	5      S:w  a  U(       a  U	R                  S5        OMH  [        U	S   5      (       d	  U(       d  Md  U	S   R                  S	S
5      n
[        U
5      n
U" U
5      n
U	S   R                  S	S
5      n[        U5      nU" U5      nUR                  X45        M     U$ s  snnf )a  Parse a query given as a string argument.

Data are returned as a list of name, value pairs as bytes.

Arguments:

qs: percent-encoded query string to be parsed

keep_blank_values: flag indicating whether blank values in
    percent-encoded queries should be treated as blank strings.  A
    true value indicates that blanks should be retained as blank
    strings.  The default false value indicates that blank values
    are to be ignored and treated as if they were  not included.

.&;=r      rV   r   + )r   r   tuplerB   rx   r	   rz   lenr{   r   r   )qsrl   coerce_args_coerce_results1s2pairsr
name_valuenvr   r   s               r%   r   r     s$   , xU3e0D+D%E EFUK$RB((3-@-B((3-BR-R-E@
A
c1%r7a< 		"r!u::**!!u}}S#6D#D)D!$'D "1c3 7E$U+E"5)EHHd]## $ H) As   #E)r$   rA   returnztuple[str, int])r4   rB   r  rB   )r   r   T)
r4   r   r7   rB   rL   rB   rM   rr   r  rB   )r   r   )r4   r   r7   rB   rL   rB   r  rB   )re   rB   r  rr   )Nr   )
r4   r   rj   rB   rk   Nonerl   
bool | intr  
str | None)r   )
r4   r   rj   rB   rk   rB   rl   r  r  rB   )
r4   r   rj   rB   rk   r  rl   r  r  r  )rh   r   r   FTF)r4   r   r~   z!StrOrBytes | Sequence[StrOrBytes]r   rB   r   rB   r   rr   r   rr   r   rr   r  rB   )r4   rB   r   r   r  rB   )r4   rB   r   rB   r   rB   r  rB   )r4   rB   r   r   r  rB   )rD   rB   r  rB   )r   rB   r  rB   )r   rB   r  rB   )r   r   r  r   )rO   r
   r7   rB   rL   rB   r  z#tuple[str, str, str, str, str, str])TFN)
r4   StrOrBytes | ParseResultrl   rr   r   rr   r7   r  r  rB   )rD   rB   r  rx   r    )r4   r  r7   r  r  r
   )F)r   rB   rl   rr   r  zlist[tuple[bytes, bytes]])cr   
__future__r   r   codecsr   rZ   r   stringcollections.abcr   typingr   r   r   r   urllib.parser	   r
   r   r   r   r   r   r   r   r   r   r   r   urllib.requestr   r   _infrar   r   _typesr   r   _urlr   utilr   r&   register_errorRFC3986_GEN_DELIMSRFC3986_SUB_DELIMSRFC3986_RESERVEDascii_lettersdigitsr@   RFC3986_UNRESERVEDEXTRA_SAFE_CHARSRFC3986_USERINFO_SAFE_CHARSr   r   r   r2   r?   rE   rI   rH   rK   ordr3   r5   rT   compilerX   r_   rf   rm   r   r   r   r   r   r   r   ry   mapchrrange_charformatescaper}   _token_quoted_stringr   r   r   r   __all__r   r   r   r   r   )chars   0r%   <module>r'     s!  
 #   	  	  $ 7 7 %    6 ? / " Q
   o{ 3   # %(:: **V]]:VCKKGT  03EEL !336FFM&&tS1  5>>tF>S  **4*@ ( 0::4:M +  !6+ 5CItO 5+ '
 	d	dd d 	d
 	dN zz-( CI9	9"9<?99*A 
 $%		  "	
  
 

 %&		  "	
 	 
 $%	'	'' ' "	'
 'X 8: ?	?4? 
? 	?
 ? ? ? 	?D4,> ;&"F 	CU3Z ! 
II
 Cq"&'( #$%	


 )//IIbgge//01299RWWU^3L ZZ 2 9 9 9 G N N PQ !zz,33^ 4 fh   3Cl" FL"%?B(, # 	\G	!\G\G \G 	\G
 	\G~	" ;?/	!/-7// (-,, $,,u+s   $M