
    X#                         	 d dl Zd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ  e	e
      dedefd       Z e	e
      dedefd       Z e	e
      dedee   fd       Z e	e
      dedefd       Z dedefdZ! e	e
      dedefd       Z" e	e
      dedefd       Z# e	e
      dedefd       Z$ e	e
      dedefd       Z% e	e
      dedefd       Z&dedefdZ' e	e
      dedefd       Z( e	e
      dedefd       Z) e	e
      dedefd       Z* e	e
      dedefd       Z+ e	e
      dedefd       Z, e	 e-e      
      dedefd       Z.d1de/d e0dee   fd!Z1 e	d"
      d#edefd$       Z2de/deee   e/f   fd%Z3d&edefd'Z4d2d(ed)edefd*Z5d+edee   fd,Z6d-ed.ede7fd/Z8d-ed.edefd0Z9y# e$ r d dlZY w xY w)3    N)IncrementalDecoder)aliases)	lru_cache)findall)ListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize	characterreturnc                     	 t        j                  |       }d|v xs d|v xs d|v xs d|v xs
 d|v xs d|v S # t        $ r Y yw xY w)NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEunicodedataname
ValueErrorr   descriptions     3platform/bq/third_party/charset_normalizer/utils.pyis_accentuatedr      s    !&&y1 	# 	';&	'[(	' {*	' +		'
 ;&  s   9 	AAc                     t        j                  |       }|s| S |j                  d      }t        t	        |d   d            S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr)   *   sA    **95JS!Es58R !!    c                 b    t        |       }t        j                         D ]  \  }}||v s|c S  y)zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger1   5   s9    
 	NM!8!>!>!@
II% "A r*   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFLATINr   r   s     r   is_latinr4   C   s8    !&&y1 k!!      	''c                 F    	 | j                  d       y# t        $ r Y yw xY w)NasciiFT)encodeUnicodeEncodeErrorr   s    r   is_asciir;   L   s.    !   s    	  c                 Z    t        j                  |       }d|v ryt        |       }|yd|v S )NPTFPunctuationr   categoryr1   r   character_categorycharacter_ranges      r   is_punctuationrD   T   s=    $--i8
  #I.OO++r*   c                 b    t        j                  |       }d|v sd|v ryt        |       }|yd|v S )NSNTFFormsr?   rA   s      r   	is_symbolrI   c   sE    $--i8
  C+=$=#I.Oo%%r*   c                 &    t        |       }|yd|v S )NF	Emoticons)r1   )r   rC   s     r   is_emoticonrL   r   s     #I.O/))r*   c                 ^    | j                         s| dv ryt        j                  |       }d|v S )N)u   ｜+,;<>TZ)isspacer   r@   r   rB   s     r   is_separatorrV   |   s6    i+KK$--i8$$$r*   c                 D    | j                         | j                         k7  S N)islowerisupperr:   s    r   is_case_variabler[      s    )"3"3"555r*   c                 6    t        j                  |       }d|k(  S )NCo)r   r@   rU   s     r   is_private_use_onlyr^      s    $--i8%%%r*   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFCJKr   r   character_names     r   is_cjkrc      s8    $)))4 N""  r5   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHIRAGANAr   ra   s     r   is_hiraganarf      8    $)))4 ''  r5   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFKATAKANAr   ra   s     r   is_katakanarj      rg   r5   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHANGULr   ra   s     r   	is_hangulrm      s8    $)))4 ~%%  r5   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFTHAIr   ra   s     r   is_thairp      s8    $)))4 ^##  r5   r/   c                 &    t         D ]  }|| v s y y)NTF)r   )r/   keywords     r   is_unicode_range_secondaryrs      s    2j  3 r*   sequencesearch_zonec                 l   t        | t              st        t        |       }t	        t
        | d||k  r|n| j                  dd            }t        |      dk(  ry|D ]T  }|j                         j                  dd      }t        j                         D ]  \  }}||k(  r|c c S ||k(  s|c c S  V y)zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr7   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   decodelowerreplacer   r-   )rt   ru   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s     h&(mG'Eg47+FMMH 	N 	
G 7|q%/557??SI-4]]_)NM!33$$ 22$$	 .= & r*      r   c                     | dv xs< t        t        j                  dj                  |             j                  t
              S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r   )r   s    r   is_multi_byte_encodingr      sG    
  
 
  
 5 5d ;<OO#
r*   c                     t         D ]>  }t         |   }t        |t              r|g}|D ]  }| j                  |      s||fc c S  @ y)z9
    Identify and extract SIG/BOM in given sequence.
    )Nr*   )r   r|   r}   
startswith)rt   iana_encodingmarksmarks       r   identify_sig_or_bomr      sQ    
 (}-eU#GED""4($d**  ( r*   r   c                 
    | dvS )N>   r   r    )r   s    r   should_strip_sig_or_bomr     s     444r*   cp_namestrictc                     | j                         j                  dd      } t        j                         D ]  \  }}| |k(  s| |k(  s|c S  |rt	        dj                  |             | S )Nrz   r{   z Unable to retrieve IANA for '{}')r   r   r   r-   r   r   )r   r   r   r   s       r   	iana_namer     se    mmo%%c3/G)0%n$=(@   *9 ;BB7KLLNr*   decoded_sequencec                 x    t               }| D ]!  }t        |      }||j                  |       # t        |      S rX   )setr1   addlist)r   rangesr   rC   s       r   
range_scanr   #  s>    UF%	'	2"

?# & <r*   iana_name_aiana_name_bc                    t        |       st        |      ryt        j                  dj                  |             j                  }t        j                  dj                  |            j                  } |d      } |d      }d}t        dd      D ]7  }t        |g      }|j                  |      |j                  |      k(  s3|dz  }9 |dz  S )	Ng        r   rw   rx   r      r      )r   r   r   r   r   ranger}   r   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr   1  s    k*.D[.Q''(=(=k(JK^^I''(=(=k(JK^^IH%DH%D1c]qc
;;}%])CC!Q&! 
 !3&&r*   c                 ,    | t         v xr |t         |    v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   F  s%     	-- 	?1+>>r*   )i   )T):unicodedata2r   ImportErrorr   codecsr   encodings.aliasesr   	functoolsr   rer   typingr   r   r	   r
   r   _multibytecodecr   constantr   r   r   r   r   r   strboolr   r)   r1   r4   r;   rD   rI   rL   rV   r[   r^   rc   rf   rj   rm   rp   r   rs   r}   r&   r   r   r   r   r   r   floatr   r   r   r*   r   <module>r      s  &  % %   4 4 7  *+c d  , *+"S "S " ," *+
S 
Xc] 
 ,
 *+" " " ,"   *+,c ,d , ,, *+& & & ,& *+*3 *4 * ,* *+%C %D % ,% *+6 6 6 ,6&3 &4 & *+#c #d # ,# *+(3 (4 ( ,( *+(3 (4 ( ,( *+& & & ,& *+$s $t $ ,$ 3./03 4  1U  QT > 3   (% E(3-2F,G $53 54 5
s 
D 
C 
 c 's ' ' '*s   E
  s   G0 0	G=<G=