a
    bgh                     @  s   d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ erld dlmZ dd	d
d
dZeeZG dd deZdS )    )annotationsN)TYPE_CHECKINGAnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStoreHippoClient	localhostZ7788Zadmin)hostportusernamepasswordc                   @  s4  e Zd ZdZdAdddd	d	d
dddddZdddddZdBddddddZdCddddddZdddd Zd
dd!d"Z	ddd#d$Z
dDd&dd'd	d(d)d*d+d,ZdEdd	dd.d'd(d/d0d1d2ZdFdd	dd.d'd(d3d0d4d5ZdGd6d	dd.d'd(d3d7d8d9Zedddedddfd)ddddd:d;d
d<d(d=d>d?d@ZdS )HHippoa  `Hippo` vector store.

    You need to install `hippo-api` and run Hippo.

    Please visit our official website for how to run a Hippo instance:
    https://www.transwarp.cn/starwarp

    Args:
        embedding_function (Embeddings): Function used to embed the text.
        table_name (str): Which Hippo table to use. Defaults to
            "test".
        database_name (str): Which Hippo database to use. Defaults to
            "default".
        number_of_shards (int): The number of shards for the Hippo table.Defaults to
            1.
        number_of_replicas (int): The number of replicas for the Hippo table.Defaults to
            1.
        connection_args (Optional[dict[str, any]]): The connection args used for
            this class comes in the form of a dict.
        index_params (Optional[dict]): Which index params to use. Defaults to
            IVF_FLAT.
        drop_old (Optional[bool]): Whether to drop the current collection. Defaults
            to False.
        primary_field (str): Name of the primary key field. Defaults to "pk".
        text_field (str): Name of the text field. Defaults to "text".
        vector_field (str): Name of the vector field. Defaults to "vector".

    The connection args used for this class comes in the form of a dict,
    here are a few of the options:
        host (str): The host of Hippo instance. Default at "localhost".
        port (str/int): The port of Hippo instance. Default at 7788.
        user (str): Use which user to connect to Hippo instance. If user and
            password are provided, we will add related header in every RPC call.
        password (str): Required when user is provided. The password
            corresponding to the user.

    Example:
        .. code-block:: python

        from langchain_community.vectorstores import Hippo
        from langchain_community.embeddings import OpenAIEmbeddings

        embedding = OpenAIEmbeddings()
        # Connect to a hippo instance on localhost
        vector_store = Hippo.from_documents(
            docs,
            embedding=embeddings,
            table_name="langchain_test",
            connection_args=HIPPO_CONNECTION
        )

    Raises:
        ValueError: If the hippo-api python package is not installed.
    testdefault   NFr   strintzOptional[Dict[str, Any]]zOptional[dict]zOptional[bool])embedding_function
table_namedatabase_namenumber_of_shardsnumber_of_replicasconnection_argsindex_paramsdrop_oldc	           
   
   C  s>  || _ || _|| _|| _|| _|| _d| _d| _d| _g | _	|d u rHt
}| || _d | _z,| j| j| jr|r| j| j| j W n> ty }	 z&td| j d|	   W Y d }	~	n
d }	~	0 0 z*| j| j| jr| j| j| j| _W n@ ty0 }	 z&td| j d|	   W Y d }	~	n
d }	~	0 0 |   d S )NpktextZvectorz+An error occurred while deleting the table z: z*An error occurred while getting the table )r   r   embedding_funcr   r   r    _primary_field_text_field_vector_fieldfieldsDEFAULT_HIPPO_CONNECTION_create_connection_aliashccolZcheck_table_existsZdelete_table	Exceptionloggingerror	get_table_get_env)
selfr   r   r   r   r   r   r    r!   e r4   t/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/hippo.py__init__P   sD    zHippo.__init__dictr   )r   returnc           	   
     s(  zddl m} W n. ty> } ztd|W Y d}~n
d}~0 0 |dd}|dd |dd}|d	d}|dur durd
|v r|d
}d
 fdd|D }qt|d t  }ntdz$t	d| d ||g||dW S  t
y" } ztd |W Y d}~n
d}~0 0 dS )z*Create the connection to the Hippo server.r   r   zQUnable to import transwarp_hipp_api, please install with `pip install hippo-api`.Nr   r   r   Zshivar   ,c                   s   g | ]}| d   qS ):r4   ).0hr   r4   r5   
<listcomp>       z2Hippo._create_connection_alias.<locals>.<listcomp>r:   z/Missing standard address type for reuse attemptzcreate HippoClient[])r   pwdzFailed to create new connection) transwarp_hippo_api.hippo_clientr   ImportErrorgetsplitjoinr   
ValueErrorloggerinfor-   r/   )	r2   r   r   r3   r   r   r   hostsZgiven_addressr4   r=   r5   r*      s0    

zHippo._create_connection_aliaszOptional[list]zOptional[List[dict]]None)
embeddings	metadatasr8   c                 C  s<   t d |d ur(t d | || |   |   d S )Nzinit ...zcreate collection)rH   rI   _create_collection_extract_fields_create_index)r2   rL   rM   r4   r4   r5   r1      s    

zHippo._get_envlistc           
   	   C  sD  ddl m} ddlm} t|d }td|  g }||| jd|j	 ||| j
d|j	 ||| jd|jd|id |r|d  D ]L\}}t|trt|}	|||d|jd|	id q|||d|j	 qtd	|  | jj| jd|| j| j| jd
 | j| j| j| _td| j d| j d d S )Nr   )
HippoField)	HippoTypez[_create_collection] dim: TF	dimension)Ztype_paramsz[_create_collection] fields: )nameZauto_idr(   r   r   r   z$[_create_collection] : create table z in z successfully)rB   rR   transwarp_hippo_api.hippo_typerS   lenrH   debugappendr%   STRINGr&   r'   ZFLOAT_VECTORitems
isinstancerQ   r+   Zcreate_tabler   r   r   r   r0   r,   rI   )
r2   rL   rM   rR   rS   dimr(   keyvalueZ	value_dimr4   r4   r5   rN      sZ    
	zHippo._create_collection)r8   c                 C  s^   ddl m} t| j|rZ| jj}td|  |D ]}| j|j	 q4td| j  dS )z,Grab the existing fields from the Collectionr   
HippoTablez[_extract_fields] schema:z04 [_extract_fields] fields:N)
rB   ra   r\   r,   schemarH   rX   r(   rY   rU   )r2   ra   rb   xr4   r4   r5   rO      s    zHippo._extract_fieldsc                 C  s   ddl m} t| j|r| j| j| j| ji }|dd}|du rLdS | j| j| j| j d D ]*}t	
d|  |d | jkrh|  S qhdS )z0Return the vector index information if it existsr   r`   embedding_indexesNz[_get_index] embedding_indexes column)rB   ra   r\   r,   r+   Zget_table_infor   r   rD   rH   rX   r'   )r2   ra   Z
table_inford   rc   r4   r4   r5   
_get_index  s&    
zHippo._get_indexc                 C  s  ddl m} ddlm}m} t| j|r|  du r|  du r| jdu rd|j	|j
dd| _| jj| j| jd | jd	 | jd
 | jd d t| j| jd  td nZ|j
|j|j|j|jd}|j|j|j	|j	d}|| jd
  | jd
< | jd	 dkrd|| jd	  | jd	< | j| j| jd | jd	 | jd
  t| j| jd  n| jd	 dks| jd	 dkr|| jd	  | jd	< | jj| j| jd | jd	 | jd
 | jdd| jddd t| j| jd  n | jd	 dkr|| jd	  | jd	< | jj| j| jd | jd	 | jd
 | jdd| jdd| jdd| jdd t| j| jd  n| jd	 dkr|| jd	  | jd	< | jj| j| jd | jd	 | jd
 | jd| jd| jdd t| j| jd  ntddS ) z Create a index on the collectionr   r`   )	IndexType
MetricTypeNZlangchain_auto_create
   )
index_namemetric_type
index_typenlistrj   rl   rk   rm   )rm   zcreate index successfully)IVF_FLATFLATIVF_SQIVF_PQHNSW)ipIPl2L2ro   rn   rp   nprobe)rm   rw   rq   nbits   m)rm   rw   rx   rz   rr   Mef_construction	ef_search)r{   r|   r}   zeIndex name does not match, please enter the correct index name. (FLAT, IVF_FLAT, IVF_PQ,IVF_SQ, HNSW))rB   ra   rV   rg   rh   r\   r,   rf   r    rv   rn   Zcreate_indexr'   rH   rX   Zactivate_indexrI   ro   rp   rq   rr   rt   rD   rG   )r2   ra   rg   rh   Z
index_dictZmetric_dictr4   r4   r5   rP     s    
	









	zHippo._create_index  zIterable[str]zOptional[int]r   z	List[str])textsrM   timeout
batch_sizekwargsr8   c                   s"  ddl m} |r"tdd |D r0td g S t|}td|  zj|}W n$ ty|   fdd|D }Y n0 t	|dkrtd g S td	t	|  t
j|sĈ|| j|j|itd
|  tdj  |dur>|D ]8}| D ](\}	}
|	jv r|	g |
 qqtj  j }t	|}djv rxjd td|  td||D ]t| |  fddjD }z j|}td|  W n8 ty } ztd| |W Y d}~n
d}~0 0 qdgS )a  
        Add text to the collection.

        Args:
            texts: An iterable that contains the text to be added.
            metadatas: An optional list of dictionaries,
            each dictionary contains the metadata associated with a text.
            timeout: Optional timeout, in seconds.
            batch_size: The number of texts inserted in each batch, defaults to 1000.
            **kwargs: Other optional parameters.

        Returns:
            A list of strings, containing the unique identifiers of the inserted texts.

        Note:
            If the collection has not yet been created,
            this method will create a new collection.
        r   r`   c                 s  s   | ]}|d kV  qdS ) Nr4   )r;   tr4   r4   r5   	<genexpr>  r?   z"Hippo.add_texts.<locals>.<genexpr>zNothing to insert, skipping.z[add_texts] texts: c                   s   g | ]} j |qS r4   )r$   embed_queryr;   rc   )r2   r4   r5   r>     r?   z#Hippo.add_texts.<locals>.<listcomp>z[add_texts] len_embeddings:z[add_texts] metadatas:z[add_texts] fields:Nr"   z[add_texts] total_count:c                   s   g | ]}|   qS r4   r4   r   )endiinsert_dictr4   r5   r>     r?   z05 [add_texts] insert z0Failed to insert batch starting at entity: %s/%sr   )rB   ra   allrH   rX   rQ   r$   Zembed_documentsNotImplementedErrorrW   r\   r,   r1   r&   r'   r(   r[   
setdefaultrY   removerangeminZinsert_rowsrI   r-   r/   )r2   r   rM   r   r   r   ra   rL   dr^   r_   vectorsZtotal_countZinsert_listresr3   r4   )r   r   r   r2   r5   	add_texts  sX    



zHippo.add_texts   zOptional[str]zList[Document])querykparamexprr   r   r8   c                 K  sB   | j du rtd g S | jf |||||d|}dd |D S )a  
        Perform a similarity search on the query string.

        Args:
            query (str): The text to search for.
            k (int, optional): The number of results to return. Default is 4.
            param (dict, optional): Specifies the search parameters for the index.
            Defaults to None.
            expr (str, optional): Filtering expression. Defaults to None.
            timeout (int, optional): Time to wait before a timeout error.
            Defaults to None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[Document]: The document results of the search.
        N!No existing collection to search.)r   r   r   r   r   c                 S  s   g | ]\}}|qS r4   r4   )r;   doc_r4   r4   r5   r>     r?   z+Hippo.similarity_search.<locals>.<listcomp>)r,   rH   rX   similarity_search_with_score)r2   r   r   r   r   r   r   r   r4   r4   r5   similarity_search  s    


zHippo.similarity_searchzList[Tuple[Document, float]]c           	      K  sD   | j du rtd g S | j|}| jf |||||d|}|S )a  
        Performs a search on the query string and returns results with scores.

        Args:
            query (str): The text being searched.
            k (int, optional): The number of results to return.
            Default is 4.
            param (dict): Specifies the search parameters for the index.
            Default is None.
            expr (str, optional): Filtering expression. Default is None.
            timeout (int, optional): The waiting time before a timeout error.
            Default is None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[float], List[Tuple[Document, any, any]]:
        Nr   )	embeddingr   r   r   r   )r,   rH   rX   r$   r   &similarity_search_with_score_by_vector)	r2   r   r   r   r   r   r   r   retr4   r4   r5   r     s    


z"Hippo.similarity_search_with_scorezList[float])r   r   r   r   r   r   r8   c                   sH  | j du rtd g S | jdd }|| j td| j  td|g  td|  td|  td|  | j j| j|g|||d td	   | jd
 }g }	d}
t fdd|D  D ]j}dd t||D }t	|
| j|d}td d |    d | |
 }|
d7 }
|	||f q|	S )a  
        Performs a search on the query string and returns results with scores.

        Args:
            embedding (List[float]): The embedding vector being searched.
            k (int, optional): The number of results to return.
            Default is 4.
            param (dict): Specifies the search parameters for the index.
            Default is None.
            expr (str, optional): Filtering expression. Default is None.
            timeout (int, optional): The waiting time before a timeout error.
            Default is None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[Tuple[Document, float]]: Resulting documents and scores.
        Nr   zsearch_field:zvectors:zoutput_fields:ztopk:zdsl:)Zsearch_fieldr   output_fieldsZtopkZdslz-[similarity_search_with_score_by_vector] res:z%scoresr   c                   s   g | ]} d  | qS )r   r4   )r;   fieldr   r4   r5   r>   f  r?   z@Hippo.similarity_search_with_score_by_vector.<locals>.<listcomp>c                 S  s   i | ]\}}||qS r4   r4   )r;   r   r_   r4   r4   r5   
<dictcomp>g  r?   z@Hippo.similarity_search_with_score_by_vector.<locals>.<dictcomp>)Zpage_contentmetadataz;[similarity_search_with_score_by_vector] res[0][score_col]:r   )r,   rH   rX   r(   r   r'   r   r&   zipr
   poprY   )r2   r   r   r   r   r   r   r   Z	score_colr   countr[   metar   Zscorer4   r   r5   r   .  sB    



z,Hippo.similarity_search_with_score_by_vectorzDict[str, Any]zOptional[Dict[Any, Any]]boolz'Hippo')r   r   rM   r   r   r   r    search_paramsr!   r   r8   c
              	   K  sd   |du ri }t d | f ||||||	d|
}t d|  t d|  |j||d |S )a  
        Creates an instance of the VST class from the given texts.

        Args:
            texts (List[str]): List of texts to be added.
            embedding (Embeddings): Embedding model for the texts.
            metadatas (List[dict], optional):
            List of metadata dictionaries for each text.Defaults to None.
            table_name (str): Name of the table. Defaults to "test".
            database_name (str): Name of the database. Defaults to "default".
            connection_args (dict[str, Any]): Connection parameters.
            Defaults to DEFAULT_HIPPO_CONNECTION.
            index_params (dict): Indexing parameters. Defaults to None.
            search_params (dict): Search parameters. Defaults to an empty dictionary.
            drop_old (bool): Whether to drop the old collection. Defaults to False.
            kwargs: Other arguments.

        Returns:
            Hippo: An instance of the VST class.
        Nz'00 [from_texts] init the class of Hippo)r   r   r   r   r    r!   z[from_texts] texts:z[from_texts] metadatas:)r   rM   )rH   rI   rX   r   )clsr   r   rM   r   r   r   r    r   r!   r   Z	vector_dbr4   r4   r5   
from_textss  s"    #
	zHippo.from_texts)r   r   r   r   NNF)NN)N)NNr~   )r   NNN)r   NNN)r   NNN)__name__
__module____qualname____doc__r6   r*   r1   rN   rO   rf   rP   r   r   r   r   classmethodr)   r   r4   r4   r4   r5   r      sZ   :       9"  Aw   [    %    *    Er   )
__future__r   r.   typingr   r   r   r   r   r   r	   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   rB   r   r)   	getLoggerr   rH   r   r4   r4   r4   r5   <module>   s   $
