a
    `g                     @  s<   d dl mZ d dlmZmZ d dlmZ G dd deZdS )    )annotations)AnyList)TextSplitterc                      sF   e Zd ZdZdddddddd	d
 fddZdddddZ  ZS )NLTKTextSplitterz"Splitting text using NLTK package.

englishF)use_span_tokenizestrboolr   None)	separatorlanguager	   kwargsreturnc                  s   t  jf i | || _|| _|| _| jr<| jdkr<tdz6| jr^ddlm} || j| _nddlm	} || _W n t
y   t
dY n0 dS )zInitialize the NLTK splitter. z6When use_span_tokenize is True, separator should be ''r   )_get_punkt_tokenizer)sent_tokenizezANLTK is not installed, please install it with `pip install nltk`.N)super__init__
_separator	_language_use_span_tokenize
ValueErrorZnltk.tokenizer   
_tokenizerr   ImportError)selfr   r   r	   r   r   r   	__class__ k/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_text_splitters/nltk.pyr      s     	
zNLTKTextSplitter.__init__z	List[str])textr   c           	      C  s   | j rzt| j|}g }t|D ]T\}\}}|dkr`||d  d }||| |||  }n||| }|| q"n| j|| jd}| || jS )z&Split incoming text and return chunks.r      )r   )	r   listr   Zspan_tokenize	enumerateappendr   Z_merge_splitsr   )	r   r!   spansZsplitsistartendZprev_endZsentencer   r   r    
split_text(   s    zNLTKTextSplitter.split_text)r   r   )__name__
__module____qualname____doc__r   r*   __classcell__r   r   r   r    r      s     r   N)
__future__r   typingr   r   Zlangchain_text_splitters.baser   r   r   r   r   r    <module>   s   