a
    bgY                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ e Zd
dddddZG dd deZG dd deZG dd deZdS )    )annotationsN)sha1)Thread)AnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStore)BaseSettingsSettingsConfigDictstrr   bool)sargsreturnc                 G  s   |D ]}|| vr dS qdS )z
    Check if a string contains multiple substrings.
    Args:
        s: string to check.
        *args: substrings to check.

    Returns:
        True if all substrings are in the string, False otherwise.
    FT )r   r   ar   r   v/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/myscale.pyhas_mul_sub_str   s    
r   c                   @  s   e Zd ZU dZdZded< dZded< dZd	ed
< dZd	ed< dZ	ded< dZ
ded< dddddZded< dZded< dZded< dZded< ddddd Zed!d"d#d$d%ZdS )&MyScaleSettingsa  MyScale client configuration.

    Attribute:
        myscale_host (str) : An URL to connect to MyScale backend.
                             Defaults to 'localhost'.
        myscale_port (int) : URL port to connect with HTTP. Defaults to 8443.
        username (str) : Username to login. Defaults to None.
        password (str) : Password to login. Defaults to None.
        index_type (str): index type string.
        index_param (dict): index build parameter.
        database (str) : Database name to find the table. Defaults to 'default'.
        table (str) : Table name to operate on.
                      Defaults to 'vector_table'.
        metric (str) : Metric to compute distance,
                       supported are ('L2', 'Cosine', 'IP'). Defaults to 'Cosine'.
        column_map (Dict) : Column type map to project column name onto langchain
                            semantics. Must have keys: `text`, `id`, `vector`,
                            must be same size to number of columns. For example:
                            .. code-block:: python

                                {
                                    'id': 'text_id',
                                    'vector': 'text_embedding',
                                    'text': 'text_plain',
                                    'metadata': 'metadata_dictionary_in_json',
                                }

                            Defaults to identity map.

    	localhostr   hosti   intportNOptional[str]usernamepasswordZMSTG
index_typezOptional[Dict[str, str]]index_paramidtextvectormetadata)r#   r$   r%   r&   zDict[str, str]
column_mapdefaultdatabaseZ	langchaintableZCosinemetricr   )itemr   c                 C  s
   t | |S N)getattr)selfr,   r   r   r   __getitem__U   s    zMyScaleSettings.__getitem__z.envutf-8Zmyscale_ignore)Zenv_fileZenv_file_encodingZ
env_prefixextra)__name__
__module____qualname____doc__r   __annotations__r   r   r    r!   r"   r'   r)   r*   r+   r0   r   Zmodel_configr   r   r   r   r   !   s,   
r   c                      sR  e Zd ZdZd?ddddd fdd	Zedd
ddZdddddZddddddZddddddZ	d@dddddddddZ
edAddd ddddd d!d"d#Zdd
d$d%ZdBd&dd'dd(d)d*ZdCddd'dd,d-d.d/ZdDd&dd'dd,d0d1d2ZdEddd'dd3d-d4d5Zdd
d6d7ZdFd8d'dd9d:d;d<Zedd
d=d>Z  ZS )GMyScalea  `MyScale` vector store.

    You need a `clickhouse-connect` python package, and a valid account
    to connect to MyScale.

    MyScale can not only search with simple vector indexes.
    It also supports a complex query with multiple conditions,
    constraints and even sub-queries.

    For more information, please visit
        [myscale official site](https://docs.myscale.com/en/overview/)
    Nr   Optional[MyScaleSettings]r   None)	embeddingconfigkwargsr   c                   s  zddl m} W n ty*   tdY n0 zddlm} || _W n ty^   dd | _Y n0 t   |durz|| _nt | _| jsJ | jj	r| jj
sJ | jjr| jjr| jjr| jjsJ dD ]}|| jjv sJ q| jj d	v sJ | jjd
v r
td t|d}| jjrBdddd | jj D  nd}d| jj d| jj d| jjd  d| jjd  d| jjd  d| jjd  d| jjd  d| d| jjd  d| jj d| jj d | d!| jjd  d"}	|| _d#| _d$| _|| _| jj d%v rd&nd'| _|f | jj	| jj
| jj| jjd(|| _z| j d) W n< t!y }
 z"t"d*| jj# d+ W Y d}
~
n
d}
~
0 0 | j d, | j |	 dS )-zMyScale Wrapper to LangChain

        embedding (Embeddings):
        config (MyScaleSettings): Configuration to MyScale Client
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        r   )
get_clientzlCould not import clickhouse connect python package. Please install it with `pip install clickhouse-connect`.)tqdmc                 S  s   | S r-   r   )xr   r   r   <lambda>       z"MyScale.__init__.<locals>.<lambda>N)r#   r%   r$   r&   )ZIPCOSINEL2)ipZcosinel2z_Lower case metric types will be deprecated the future. Please use one of ('IP', 'Cosine', 'L2')ztry this out, ,c                 S  s"   g | ]\}}d | d| d qS )'=r   ).0kvr   r   r   
<listcomp>   rC   z$MyScale.__init__.<locals>.<listcomp> z(
            CREATE TABLE IF NOT EXISTS .z(
                r#   z String,
                r$   r%   z! Array(Float32),
                r&   zP JSON,
                CONSTRAINT cons_vec_len CHECK length(                    z) = z$,
                VECTOR INDEX vidx z                     TYPE z&(                        'metric_type=rJ   z,)
            ) ENGINE = MergeTree ORDER BY z	
        \)rR   rJ   )rD   rE   ASCZDESC)r   r   r   r    z"SET allow_experimental_json_type=1zClickhouse version=z6 - There is no allow_experimental_json_type parameter.z$SET allow_experimental_object_type=1)$Zclickhouse_connectr?   ImportErrorr@   pgbarsuper__init__r=   r   r   r   r'   r)   r*   r+   upperloggerwarninglenembed_queryr"   joinitemsr!   dimBSmust_escape_embeddings
dist_orderr   r    clientcommand	ExceptiondebugZserver_version)r/   r<   r=   r>   r?   r@   rM   r_   Zindex_paramsZschema__	__class__r   r   rW   n   s    



$





	


zMyScale.__init__r   c                 C  s   | j S r-   )rb   r/   r   r   r   
embeddings   s    zMyScale.embeddingsr   )valuer   c                   s   d  fdd|D S )NrP   c                 3  s*   | ]"}| j v r j | n|V  qd S r-   )ra   r`   )rL   crl   r   r   	<genexpr>   rC   z%MyScale.escape_str.<locals>.<genexpr>)r]   )r/   rn   r   rl   r   
escape_str   s    zMyScale.escape_strr   zIterable[str])transaccolumn_namesr   c              
     sr   d |}g }|D ].}d  fdd|D }|d| d qd jj d jj d| dd | d		}|S )
NrI   c                   s"   g | ]}d   t| d qS rJ   )rq   r   )rL   Z_nrl   r   r   rO      rC   z'MyScale._build_istr.<locals>.<listcomp>()z8
                INSERT INTO TABLE 
                    rQ   z))
                VALUES
                z
                )r]   appendr=   r)   r*   )r/   rr   rs   ks_datanZi_strr   rl   r   _build_istr   s    
zMyScale._build_istrc                 C  s   |  ||}| j| d S r-   )r{   rd   re   )r/   rr   rs   Z_i_strr   r   r   _insert   s    zMyScale._insert    zOptional[List[dict]]r   zOptional[Iterable[str]]	List[str])texts	metadatas
batch_sizeidsr>   r   c              
   K  s  |pdd |D }| j j}g }|d ||d ||d t| jj|i}|pTdd |D }ttj|||d < tt|t| dksJ t	|
  \}	}
zd	}| jt	|
 d
t|dD ]j}t||	| j jd  | jksJ || t||kr|r |  t| j||	gd}|  g }qt|dkrH|r<|  | ||	 dd |D W S  ty } z0tdt| dt| d g W  Y d	}~S d	}~0 0 d	S )a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            ids: Optional list of ids to associate with the texts.
            batch_size: Batch size of insertion
            metadata: Optional column data to be inserted

        Returns:
            List of ids from adding the texts into the vectorstore.

        c                 S  s   g | ]}t |d  qS )r1   )r   encode	hexdigest)rL   tr   r   r   rO      rC   z%MyScale.add_texts.<locals>.<listcomp>r#   r$   r%   c                 S  s   g | ]}i qS r   r   )rL   rh   r   r   r   rO     rC   r&   r   NzInserting data...)desctotal)targetr   c                 S  s   g | ]}|qS r   r   )rL   ir   r   r   rO     rC   	[91m[1m
[0m [95m[0m)r=   r'   maprb   r\   jsondumpsr[   setzipr^   rU   indexr_   rw   r]   r   r|   startrf   rY   errortyper   )r/   r   r   r   r   r>   Zcolmap_rr   rs   keysvaluesr   rN   er   r   r   	add_texts   s@    
$
 zMyScale.add_textszOptional[List[Dict[Any, Any]]])r   r<   r   r=   text_idsr   r>   r   c           	      K  s(   | ||fi |}|j ||||d |S )aZ  Create Myscale wrapper with existing texts

        Args:
            texts (Iterable[str]): List or tuple of strings to be added
            embedding (Embeddings): Function to extract text embedding
            config (MyScaleSettings, Optional): Myscale configuration
            text_ids (Optional[Iterable], optional): IDs for the texts.
                                                     Defaults to None.
            batch_size (int, optional): Batchsize when transmitting data to MyScale.
                                        Defaults to 32.
            metadata (List[dict], optional): metadata to texts. Defaults to None.
            Other keyword arguments will pass into
                [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python#clickhouse-connect-driver-api)
        Returns:
            MyScale Index
        )r   r   r   )r   )	clsr   r<   r   r=   r   r   r>   ctxr   r   r   
from_texts  s    zMyScale.from_textsc                 C  s   d| j j d| j j d}|| j j d| j j d7 }|d| j j d7 }|d7 }| jd	| j j d| j j  D ]&}|d
|d dd|d dd7 }qv|d7 }|S )zText representation for myscale, prints backends, username and schemas.
            Easy to use with `str(Myscale())`

        Returns:
            repr: string to show connection info and data schema
        z	[92m[1mrQ   z @ :z[0m

z[1musername: z[0m

Table Schema:
z4---------------------------------------------------
zDESC z|[94mnameZ24sz
[0m|[96mr   z[0m|
)	r=   r)   r*   r   r   r   rd   querynamed_results)r/   _reprrr   r   r   __repr__>  s    zMyScale.__repr__List[float]r   q_embtopk	where_strr   c                 C  s   d tt|}|r d| }nd}d| jjd  d| jjd  d| jj d	| jj d
| d| jjd  d| d| j d| d
}|S )NrI   	PREWHERE rP   
            SELECT r$   z, 
                r&   z, dist
            FROM rQ   
            
            ORDER BY distance(r%   , []) 
                AS dist 
            LIMIT )r]   r   r   r=   r'   r)   r*   rc   r/   r   r   r   Z	q_emb_strq_strr   r   r   _build_qstrR  s0    


	zMyScale._build_qstr   List[Document]r   rM   r   r>   r   c                 K  s   | j | j|||fi |S )a  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of Documents
        )similarity_search_by_vectorrb   r\   )r/   r   rM   r   r>   r   r   r   similarity_searchf  s
    zMyScale.similarity_searchr<   rM   r   r>   r   c              
     s|     |||}z  fdd j| D W S  tyv } z0tdt| dt| d g W  Y d}~S d}~0 0 dS )  Perform a similarity search with MyScale by vectors

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of (Document, similarity)
        c                   s0   g | ](}t | jjd   | jjd  dqS )r$   r&   Zpage_contentr&   r   r=   r'   rL   r   rl   r   r   rO     s
   z7MyScale.similarity_search_by_vector.<locals>.<listcomp>r   r   r   N	r   rd   r   r   rf   rY   r   r   r   r/   r<   rM   r   r>   r   r   r   rl   r   r   }  s    
 z#MyScale.similarity_search_by_vectorList[Tuple[Document, float]]c              
     s      j|||}z  fdd j| D W S  ty~ } z0tdt	| dt
| d g W  Y d}~S d}~0 0 dS )/  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of documents most similar to the query text
            and cosine distance in float for each.
            Lower score represents more similarity.
        c                   s8   g | ]0}t | jjd   | jjd  d|d fqS )r$   r&   r   distr   r   rl   r   r   rO     s   zCMyScale.similarity_search_with_relevance_scores.<locals>.<listcomp>r   r   r   Nr   rb   r\   rd   r   r   rf   rY   r   r   r   r/   r   rM   r   r>   r   r   r   rl   r   'similarity_search_with_relevance_scores  s    

 z/MyScale.similarity_search_with_relevance_scoresc                 C  s$   | j d| jj d| jj  dS )z,
        Helper function: Drop data
        zDROP TABLE IF EXISTS rQ   N)rd   re   r=   r)   r*   rl   r   r   r   drop  s    zMyScale.dropzOptional[List[str]]zOptional[bool])r   r   r>   r   c           	   
   K  s   |du r|du rJ dg }|r^t |dkr^ddd |D }|| jjd  d| d	 |rl|| t |dks|J d
|}d| jj d| jj d| }z| j| W dS  t	y } zt
t| W Y d}~dS d}~0 0 dS )a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        NzIYou need to specify where to be deleted! Either with `ids` or `where_str`r   rH   c                 S  s   g | ]}d | d qS rt   r   )rL   r#   r   r   r   rO     rC   z"MyScale.delete.<locals>.<listcomp>r#   z IN (rv   z AND zDELETE FROM rQ   z WHERE TF)r[   r]   rw   r=   r'   r)   r*   rd   re   rf   rY   r   r   )	r/   r   r   r>   ZcondsZid_listZwhere_str_finalZqstrr   r   r   r   delete  s0    

zMyScale.deletec                 C  s   | j jd S )Nr&   )r=   r'   rl   r   r   r   metadata_column  s    zMyScale.metadata_column)N)Nr}   N)NNNr}   )N)r   N)r   N)r   N)NN)r4   r5   r6   r7   rW   propertyrm   rq   r{   r|   r   classmethodr   r   r   r   r   r   r   r   r   __classcell__r   r   ri   r   r9   `   sD    `   8         % %
  &r9   c                      s   e Zd ZdZdg fdddddd fd	d
ZddddddddZdddddddddZd ddddddddZeddddZ	  Z
S )!MyScaleWithoutJSONzsMyScale vector store without metadata column

    This is super handy if you are working to a SQL-native table
    Nr   r:   r~   r   r;   )r<   r=   must_have_colsr>   r   c                   s    t  j||fi | || _dS )ag  Building a myscale vector store without metadata column

        embedding (Embeddings): embedding model
        config (MyScaleSettings): Configuration to MyScale Client
        must_have_cols (List[str]): column names to be included in query
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        N)rV   rW   r   )r/   r<   r=   r   r>   ri   r   r   rW     s    zMyScaleWithoutJSON.__init__r   r   r   r   r   c                 C  s   d tt|}|r d| }nd}d| jjd  dd | j d| jj d| jj d	| d
| jjd  d| d| j d| d	}|S )NrI   r   rP   r   r$   z, dist, 
                z
            FROM rQ   r   r   r%   r   r   r   )	r]   r   r   r=   r'   r   r)   r*   rc   r   r   r   r   r     s0    


	zMyScaleWithoutJSON._build_qstrr   r   r   c              
     s|     |||}z  fdd j| D W S  tyv } z0tdt| dt| d g W  Y d}~S d}~0 0 dS )r   c                   s4   g | ], t  jjd    fddjD dqS )r$   c                   s   i | ]}| | qS r   r   rL   rM   r   r   r   
<dictcomp>A  rC   zMMyScaleWithoutJSON.similarity_search_by_vector.<locals>.<listcomp>.<dictcomp>r   r   r=   r'   r   rL   rl   r   r   rO   >  s
   zBMyScaleWithoutJSON.similarity_search_by_vector.<locals>.<listcomp>r   r   r   Nr   r   r   rl   r   r   %  s    
 z.MyScaleWithoutJSON.similarity_search_by_vectorr   r   c              
     s      j|||}z  fdd j| D W S  ty~ } z0tdt	| dt
| d g W  Y d}~S d}~0 0 dS )r   c                   s<   g | ]4 t  jjd    fddjD d d fqS )r$   c                   s   i | ]}| | qS r   r   r   r   r   r   r   d  rC   zYMyScaleWithoutJSON.similarity_search_with_relevance_scores.<locals>.<listcomp>.<dictcomp>r   r   r   r   rl   r   r   rO   `  s   zNMyScaleWithoutJSON.similarity_search_with_relevance_scores.<locals>.<listcomp>r   r   r   Nr   r   r   rl   r   r   I  s    

 z:MyScaleWithoutJSON.similarity_search_with_relevance_scoresrk   c                 C  s   dS )NrP   r   rl   r   r   r   r   n  s    z"MyScaleWithoutJSON.metadata_column)N)r   N)r   N)r4   r5   r6   r7   rW   r   r   r   r   r   r   r   r   ri   r   r     s      % %r   )
__future__r   r   logginghashlibr   	threadingr   typingr   r   r   r   r	   r
   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   Zpydantic_settingsr   r   	getLoggerrY   r   r   r9   r   r   r   r   r   <module>   s"    ?   