
    $                         d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZ ddlZ	 ddlZdZh dZh d	Zd
 Zd ZeddfdZd Zedk(  r e        yy# e$ r dZY /w xY w)z~
Run chardet on a bunch of documents and see that we get the correct encodings.

:author: Dan Blanchard
:author: Ian Cordasco
    N)defaultdict)listdir)dirnameisdirjoinrealpathrelpathsplitextTF>   
iso-8859-2
iso-8859-6windows-1250windows-1254windows-1256>   %tests/iso-8859-9-turkish/subtitle.srt&tests/iso-8859-7-greek/disabled.gr.xml+tests/iso-8859-9-turkish/divxplanet.com.xml2tests/iso-8859-9-turkish/wikitop_tr_ISO-8859-9.txtc                      t        t        d      rd} | S t        j                  j                  d      rd} | S t        j                  dk(  rd} | S d} | S )z"Return what kind of Python this ispypy_version_infoPyPyjavaJythoncli
IronPythonCPython)hasattrsysplatform
startswith)pyimpls    ,platform/gsutil/third_party/chardet/bench.pyget_py_implr"   +   s_    s'( M 
	 	 	(
 M	 
	 M M    c            	   #     K   t        t        t        t        t                    d            } t        |       D ]  }t        | |      }t        |      s|j                         }dD ])  }|j                  |      s|j                  |      d   } n |t        v rbt        |      D ]>  }t        |      d   j                         }|dvr$t        ||      }|t        v r9||f @  yw)z1Yields filenames to use for timing chardet.detecttests)z-arabicz
-bulgarianz	-cyrillicz-greekz-hebrewz
-hungarianz-turkishr      )z.htmlz.txtz.xmlz.srtN)r	   r   r   r   __file__r   r   lowerendswith
rpartitionMISSING_ENCODINGSr
   EXPECTED_FAILURES)	base_pathencodingpathpostfix	file_nameext	full_paths          r!   get_test_filesr4   8   s     WXh%78'BCII&Ix(T{>>#
G   )#..w7:
 (( I9%a(..0C;;T9-I--X%% '/ 's   A8C*;A/C*
   c           
         t        d| j                   d| j                   dt                dt        j
                          t        d       d}d}t        t              }t        t              }t               D ]  \  }}|dz  }t        |d      5 }	|	j                         }
d d d        t        j                         }t        |      D ]  }| j                  
        t        j                         |z
  }|rt        d| d	||z   d
       n+t        dd       t        j                  j!                          ||z  }||xx   |z  cc<   ||xx   dz  cc<    t        d       t#        |j%                               D ]   }|||   z  ||   z  }t        | d	|        " ||z  |z  }t        d| d| d       y # 1 sw Y   xY w)NzBenchmarking  z on zP--------------------------------------------------------------------------------r   r&   rbzAverage time for z: s. )endz$
Calls per second for each encoding:z
Total time: zs (z calls per second))print__name____version__r"   r   versionr   floatintr4   openreadtimerangedetectstdoutflushsortedkeys)chardet_modverbose	num_iters
total_time	num_filesencoding_timesencoding_num_filesr3   r.   finput_bytesstart_
bench_timecalls_per_secs                  r!   	benchmarkrY   \   s   	
,,-Q{/F/F.G Hm_Ackk]	, 
(OJI 'N$S)-/	8Q	)T"a&&(K #		y!A{+ "YY[5(
%i[:	3I2J!LM#2JJj 
x J. 8$)$  0" 

12>..01*844~h7OO 	 	
"]O,-	 2
 	)J6M	N:,c-8J
KL- #"s   F;;G	c                     t        j                  dt         j                        } | j                  dddd       | j                  dd	d
t        d       | j                  dddd       | j                         }|j                  r&t        s t        d       t        j                  d       t        |j                  rt        nt        |j                  |j                         y )NzHTimes how long it takes to process each file in test set multiple times.)descriptionformatter_classz-cz
--cchardet
store_truezCRun benchmarks for cChardet instead of chardet, if it is installed.)actionhelpz-iz--iterationsz$Number of times to process each filer5   )r_   typedefaultz-vz	--verbosez/Prints out the timing for each individual file.)r_   r^   z:You must pip install cchardet if you want to benchmark it.r&   )rL   rM   rN   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentrB   
parse_argscchardetHAVE_CCHARDETr=   r   exitrY   chardetrM   
iterations)parserargss     r!   mainrn      s    $$ >>F
 R	   3   >	   D}}]JK $H7//r#   __main__)__doc__rb   r   rE   collectionsr   osr   os.pathr   r   r   r   r	   r
   rj   rg   rh   	Exceptionr+   r,   r"   r4   rY   rn   r>    r#   r!   <module>rv      s     
  #  E E M  
!&H "5B "MJ#L zF c  Ms   A A&%A&