
                            d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZ 	 ddlmZ ddlmZmZmZmZ d	ZddlZddlZddlmZ h dZh dZ d Z!ejD                  jG                  d e!             d        Z$ejD                  jG                  d e!             d        Z%er G d de&      Z'ejD                  jP                   e ejR                  d       ejT                  g d       ejV                                ed      d                      Z, e ejR                  d       ejT                  g d       ejV                                ed      d               Z-yy# e$ r d
ZY 5w xY w)z~
Run chardet on a bunch of documents and see that we get the correct encodings.

:author: Dan Blanchard
:author: Ian Cordasco
    N)ndiff)listdir)dirnameisdirjoinrealpathrelpathsplitext)pformat)	normalize)	VerbosityassumegivensettingsTF)	LANGUAGES>   
iso-8859-2
iso-8859-6windows-1250windows-1254windows-1256>   #tests/iso-8859-9-turkish/_ude_1.txt#tests/iso-8859-9-turkish/_ude_2.txt%tests/iso-8859-9-turkish/subtitle.srt+tests/iso-8859-9-turkish/divxplanet.com.xml2tests/iso-8859-9-turkish/wikitop_tr_ISO-8859-9.txtc            	   #     K   t        t        t        t        t                    d            } t        |       D ]
  }t        | |      }t        |      s|j                         }t        t        j                               D ]<  }d|j                         z   }|j                  |      s(|j                  |      d   } n |t        v rt        |      D ]l  }t        |      d   j                         }|dvr$t        ||      }||f}|t        v r-t!        j"                  |dt         j$                  j&                  i}| n  yw)zGYields tuples of paths and encodings to use for test_encoding_detectiontests-r      )z.htmlz.txtz.xmlz.srtmarksN)r	   r   r   r   __file__r   r   lowersortedr   keysendswith
rpartitionMISSING_ENCODINGSr
   EXPECTED_FAILURESpytestparammarkxfail)		base_pathencodingpathlanguagepostfix	file_nameext	full_path	test_cases	            +platform/gsutil/third_party/chardet/test.pygen_test_paramsr7   .   s     WXh%78'BCII&Ix(T{>>#y~~/0HHNN,,G  )#..w7:	 1 (( I9%a(..0C;;T9-I!8+I--"LL)M6;;;L;LM	O '! 's   B'E*BEzfile_name, encodingc                    t        | d      5 }|j                         }t        j                  |      }	 |j	                  |      }	 |j	                  |d         }d d d        r|d   xs dj                         |k(  }nd}t        d      }t        d      }|s||k7  rdj                  t        j                  |d            dz   }dj                  t        j                  |d            dz   }	dj                  t        |j                  d      |	j                  d            D 
cg c]  }
|
j                  d	      s|
 c}
d d
       }t        j                   d      }nd}d}|g}|sJ d| d| d|  d| dt#        |       
       y # t
        $ r d}Y Uw xY w# t
        t        t        f$ r d}Y ]w xY w# 1 sw Y   cxY wc c}
w )Nrb r.   FNFKC
d   T    )ignore_threshold	Expected 
, but got  for z/.  First 20 lines with character differences: 

All encodings: openreadchardetdetectdecodeLookupErrorUnicodeDecodeError	TypeErrorr"   r   r   textwrapwrapr   
splitlines
startswith
detect_allr   r2   r.   finput_bytesresultexpected_unicodedetected_unicodeencoding_matchwrapped_expectedwrapped_detectedlinediffall_encodingss                r6   test_encoding_detectionr_   L   s   	i	!ffh,	"*11(;	"*11&2DE 
  ,299;xG !)9: )9:.2BB99X]]3CS%IJTQ99X]]3CS%IJTQww "$//57G7R7RSW7XD s+	  r
  **;N 
H:ZxuYK @//3f 5!-01	3>?  	"!	" /; 	"!	" 
	.sL   &F;FF'GFF;FF;F84F;7F88F;;Gc                    t        | d      5 }|j                         }t        j                  |d      }	 |j	                  |      }	 |j	                  |d         }d d d        r|d   xs dj                         |k(  }nd}t        d      }t        d      }|s||k7  rdj                  t        j                  |d	            dz   }dj                  t        j                  |d	            dz   }	dj                  t        |j                  d      |	j                  d            D 
cg c]  }
|
j                  d
      s|
 c}
d d       }t        j                   dd      }nd}d}|g}|sJ d| d| d|  d| dt#        |       
       y # t
        $ r d}Y Vw xY w# t
        t        t        f$ r d}Y ^w xY w# 1 sw Y   dxY wc c}
w )Nr9   T)should_rename_legacyr:   r.   FNFKDr<   r=   r>   r?   )r@   ra   rA   rB   rC   z-.  First 20 lines of character differences: 
rD   rE   rS   s                r6   %test_encoding_detection_rename_legacyrc   y   s   	i	!ffh$G	"*11(;	"*11&2DE 
  ,299;xG !)9: )9:.2BB99X]]3CS%IJTQ99X]]3CS%IJTQww "$//57G7R7RSW7XD s+	  r
  **$T
  
H:ZxuYK @--1F 3!-01	3>C  	"!	" /; 	"!	" 
	.sL   (F>FF!)GFF>FF>!F;7F>:F;;F>>Gc                       e Zd Zy)JustALengthIssueN)__name__
__module____qualname__     r6   re   re      s    rj   re   r   )min_size)asciizutf-8zutf-16zutf-32z
iso-8859-7z
iso-8859-8zwindows-1255   )max_examplesc                 ~    	  j                        }t        j                        d   }|it        j                  t              5  t        t        j                         |      t        t        j                  d       fd              }d d d        y y # t        $ r t        d       Y w xY w# 1 sw Y   y xY w)NFr.   )random2   )	verbosityrn   c                     	 | z   j                        }t        j                        }|r|d   
t               y y # t        $ r t        d       Y >w xY w)NFr.   )encodeUnicodeEncodeErrorr   rH   rI   re   )suffixextendedrV   enctxts      r6   string_poisons_following_textz^test_never_fails_to_detect_if_there_is_a_valid_encoding.<locals>.string_poisons_following_text   sb    &$'&L#8#8#= %^^H5F&"4"@.00 #Av . &u&s   ? AA)rt   ru   r   rH   rI   r)   raisesre   r   sttextr   r   quiet)ry   rx   rnddatadetectedrz   s   ``    r6   7test_never_fails_to_detect_if_there_is_a_valid_encodingr      s    $	::c?D >>$'
3/0rwwy-IOO"E1 F .1	 10  " 	5M	 10s   B AB3B0/B03B<c                    	 | j                  |      }	 t        j                        }t        j
                  |      }|d   |d   d   k(  sJ y # t        $ r t        d       Y Rw xY w# t        $ r}t         d       |d }~ww xY w)NFr.   r   z != )rt   ru   r   rH   rI   rR   	ExceptionRuntimeError)ry   rx   _r   rV   resultsexcs          r6   +test_detect_all_and_detect_one_should_agreer      s    "	::c?D	B^^D)F((.G*%J)???? " 	5M	  	B&gY78cA	Bs(   A :A) A&%A&)	B2BB).__doc__rN   difflibr   osr   os.pathr   r   r   r   r	   r
   pprintr   unicodedatar   hypothesis.strategies
strategiesr|   
hypothesisr   r   r   r   HAVE_HYPOTHESISImportErrorr)   rH   chardet.metadata.languagesr   r'   r(   r7   r+   parametrizer_   rc   r   re   r,   r}   sampled_fromrandomsr   r   ri   rj   r6   <module>r      s      E E  !&==O   0  < .0AB) C)X .0AB+ C+\ 9  [[

	
 	

 31   "1( 
	
 	

 3
B   
Bu g  Os   E/ /E:9E: