a
    ~g\                     @  sx   d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
mZ G dd deZdddd	d
ddZG dd deZdS )    )annotationsN)AnyListLiteralOptionalUnion)LanguageTextSplitterc                      s>   e Zd ZdZdddddd fd	d
ZdddddZ  ZS )CharacterTextSplitterz(Splitting text that looks at characters.

Fstrboolr   None)	separatoris_separator_regexkwargsreturnc                   s"   t  jf i | || _|| _dS )Create a new TextSplitter.N)super__init__
_separator_is_separator_regex)selfr   r   r   	__class__ g/var/www/html/emsaiapi.evdpl.com/venv/lib/python3.9/site-packages/langchain_text_splitters/character.pyr      s    zCharacterTextSplitter.__init__	List[str]textr   c                 C  sB   | j r| jn
t| j}t||| j}| jr0dn| j}| ||S )&Split incoming text and return chunks. )r   r   reescape_split_text_with_regex_keep_separator_merge_splits)r   r   r   splitsr   r   r   r   
split_text   s
    z CharacterTextSplitter.split_text)r   F)__name__
__module____qualname____doc__r   r(   __classcell__r   r   r   r   r
   	   s    r
   r   $Union[bool, Literal['start', 'end']]r   )r   r   keep_separatorr   c                   s   |r|rt d| d|  |dkrF fddtdt d dD n fd	dtdt dD }t d dkr| d
d  7 }|dkr| d
 g n d g| }qt || }nt| }dd |D S )N()endc                   s    g | ]} |  |d    qS    r   .0iZ_splitsr   r   
<listcomp>(       z*_split_text_with_regex.<locals>.<listcomp>r   r4      c                   s    g | ]} |  |d    qS r3   r   r5   r8   r   r   r9   *   r:   c                 S  s   g | ]}|d kr|qS )r!   r   )r6   sr   r   r   r9   7   r:   )r"   splitrangelenlist)r   r   r/   r'   r   r8   r   r$      s     $r$   c                      s|   e Zd ZdZdddddd	d
 fddZddddddZdddddZeddd dddZe	dddddZ
  ZS )RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    NTFzOptional[List[str]]r.   r   r   r   )
separatorsr/   r   r   r   c                   s.   t  jf d|i| |p g d| _|| _dS )r   r/   )r   
 r!   N)r   r   _separatorsr   )r   rC   r/   r   r   r   r   r   r   A   s    z'RecursiveCharacterTextSplitter.__init__r   r   )r   rC   r   c                 C  s(  g }|d }g }t |D ]P\}}| jr*|nt|}|dkrD|} qjt||r|}||d d } qjq| jrt|nt|}t||| j}	g }
| jrdn|}|	D ]d}| || jk r|
	| q|
r| 
|
|}|| g }
|s|	| q| ||}|| q|
r$| 
|
|}|| |S )r    r<   r!   r4   N)	enumerater   r"   r#   searchr$   r%   _length_function_chunk_sizeappendr&   extend_split_text)r   r   rC   Zfinal_chunksr   Znew_separatorsr7   Z_sr   r'   Z_good_splitsr=   Zmerged_textZ
other_infor   r   r   rM   M   s>    

z*RecursiveCharacterTextSplitter._split_textr   c                 C  s   |  || jS )zSplit the input text into smaller chunks based on predefined separators.

        Args:
            text (str): The input text to be split.

        Returns:
            List[str]: A list of text chunks obtained after splitting.
        )rM   rF   )r   r   r   r   r   r(   u   s    	z)RecursiveCharacterTextSplitter.split_textr   )languager   r   c                 K  s   |  |}| f |dd|S )a  Return an instance of this class based on a specific language.

        This method initializes the text splitter with language-specific separators.

        Args:
            language (Language): The language to configure the text splitter for.
            **kwargs (Any): Additional keyword arguments to customize the splitter.

        Returns:
            RecursiveCharacterTextSplitter: An instance of the text splitter configured
            for the specified language.
        T)rC   r   )get_separators_for_language)clsrN   r   rC   r   r   r   from_language   s    
z,RecursiveCharacterTextSplitter.from_language)rN   r   c                 C  s  | t jks| t jkrg dS | t jkr.g dS | t jkr@g dS | t jkrRg dS | t jkrdg dS | t jkrvg dS | t jkrg dS | t j	krg dS | t j
krg d	S | t jkrg d
S | t jkrg dS | t jk rg dS | t jk rg dS | t jkrg dS | t jkr g dS | t jkr4g dS | t jkrHg dS | t jkr\g dS | t jkrpg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jv rtd|  dntd|  dtt  dS )a
  Retrieve a list of separators specific to the given language.

        Args:
            language (Language): The language for which to get the separators.

        Returns:
            List[str]: A list of separators appropriate for the specified language.
        )
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r   rD   rE   r!   )
func 
var 
const 
type rS   rT   rV   rW   r   rD   rE   r!   )rR   
public 
protected 	
private 
static rS   rT   rU   rV   rW   r   rD   rE   r!   )rR   r\   r]   r^   z

internal z
companion z
fun 
val rY   rS   rT   rU   z
when rW   
else r   rD   rE   r!   )

function rZ   
let rY   rR   rS   rT   rU   rV   rW   	
default r   rD   rE   r!   )
enum 
interface z
namespace r[   rR   rb   rZ   rc   rY   rS   rT   rU   rV   rW   rd   r   rD   rE   r!   )rb   rR   rS   	
foreach rU   
do rV   rW   r   rD   rE   r!   )
z	
message z	
service re   z
option 
import z
syntax r   rD   rE   r!   )rR   
def z
	def r   rD   rE   r!   )z
=+
z
-+
z
\*+
z

.. *

r   rD   rE   r!   )rj   rR   rS   
unless rU   rT   rh   z
begin z
rescue r   rD   rE   r!   )rj   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop rS   rk   rU   rW   z
cond z
with rT   rh   r   rD   rE   r!   )z
fn rZ   rc   rS   rU   rT   z
loop 
match rZ   r   rD   rE   r!   )rR   z
object rj   r`   rY   rS   rT   rU   rl   rW   r   rD   rE   r!   )rX   rR   
struct re   rS   rT   rU   rh   rV   rW   r   rD   rE   r!   )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r   rD   rE   r!   )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rE   r!   )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler!   )rf   re   z
implements z

delegate 
event rR   z

abstract r\   r]   r^   r_   z
return rS   z

continue rT   rg   rU   rV   z
break rW   ra   
try z
throw 	
finally 
catch r   rD   rE   r!   )z
pragma z
using z

contract rf   z	
library z
constructor r[   rb   ro   z

modifier z
error rm   re   rS   rT   rU   z

do while z

assembly r   rD   rE   r!   )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rD   rE   r!   )
z
local rb   rS   rT   rU   z
repeat r   rD   rE   r!   )z	
main :: z
main = rc   z
in rh   z
where 
:: z
= 
data z	
newtype r[   rs   z
module ri   z
qualified z
import qualified rR   z

instance rW   z
| rt   z
= {z
, r   rD   rE   r!   )rb   z
param rS   rg   rT   rU   rV   rR   rp   rr   rq   r   rD   rE   r!   z	Language z is not implemented yet!z& is not supported! Please choose from N)r   CCPPGOJAVAKOTLINJSTSPHPPROTOPYTHONRSTRUBYELIXIRRUSTSCALASWIFTMARKDOWNLATEXHTMLCSHARPSOLCOBOLLUAHASKELL
POWERSHELL_value2member_map_
ValueErrorrA   )rN   r   r   r   rO      sn    










$&z:RecursiveCharacterTextSplitter.get_separators_for_language)NTF)r)   r*   r+   r,   r   rM   r(   classmethodrQ   staticmethodrO   r-   r   r   r   r   rB   :   s      (rB   )
__future__r   r"   typingr   r   r   r   r   langchain_text_splitters.baser   r	   r
   r$   rB   r   r   r   r   <module>   s   