
    +h                    >    S SK Jr  S SKJr  S SKJr   " S S\5      rg)    )annotations)Any)TextSplitterc                  \   ^  \ rS rSrSr  SSS.         S	U 4S jjjjrS
S jrSrU =r$ )NLTKTextSplitter   z"Splitting text using NLTK package.F)use_span_tokenizec                 > [         TU ]  " S0 UD6  Xl        X l        X0l        U R                  (       a  U R                  S:w  a  Sn[        U5      e SSKnU R                  (       a+  UR                  R                  U R                  5      U l	        gUR                  R                  U l	        g! [         a  nSn[        U5      UeSnAff = f)zInitialize the NLTK splitter. z6When use_span_tokenize is True, separator should be ''r   NzANLTK is not installed, please install it with `pip install nltk`. )super__init__
_separator	_language_use_span_tokenize
ValueErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenizeImportError)	self	separatorlanguager	   kwargsmsgr   err	__class__s	           M/root/34ku/venv/lib/python3.13/site-packages/langchain_text_splitters/nltk.pyr   NLTKTextSplitter.__init__   s     	"6"#!"3""t"'<JCS/!		,&&"&--"D"DT^^"T"&--"="= 	,UCc"+	,s   ?B/ B/ /
C9CCc                f   U R                   (       am  [        U R                  R                  U5      5      n/ n[	        U5       H7  u  nu  pVUS:  a  X$S-
     S   nXU XU -   nOXU nUR                  U5        M9     OU R                  XR                  S9nU R                  X0R                  5      $ )z&Split incoming text and return chunks.r      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r   textspanssplitsistartendprev_endsentences	            r    
split_textNLTKTextSplitter.split_text&   s     ""66t<=EF#,U#3<Eq5$U|AH#U3doEH##Hh' $4 __TNN_CF!!&//::    )r   r   r   r   )z

english)
r   strr   r5   r	   boolr   r   returnNone)r)   r5   r7   z	list[str])	__name__
__module____qualname____firstlineno____doc__r   r1   __static_attributes____classcell__)r   s   @r    r   r      s\    ,  !,
 #(,, ,
  , , 
, ,6; ;r3   r   N)
__future__r   typingr   langchain_text_splitters.baser   r   r   r3   r    <module>rC      s    "  6-;| -;r3   