a
    bgS0                     @   s*  U d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZdZG dd deZG dd deZG dd deZ G dd deZ!ee e!dZ"e	e#ee f e$d< G dd de%Z&G dd deZ'dS )zWrapper around scikit-learn NearestNeighbors implementation.

The vector store can be persisted in json, bson or parquet format.
    N)ABCabstractmethod)AnyDictIterableListLiteralOptionalTupleType)uuid4)Document)
Embeddings)guard_import)VectorStore)maximal_marginal_relevance      c                   @   s\   e Zd ZdZeddddZeeedddZee	dd	d
dZ
ee	dddZdS )BaseSerializerz Base class for serializing data.Npersist_pathreturnc                 C   s
   || _ d S Nr   selfr    r   v/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/sklearn.py__init__   s    zBaseSerializer.__init__r   c                 C   s   dS )z>The file extension suggested by this serializer (without dot).Nr   clsr   r   r   	extension   s    zBaseSerializer.extensiondatar   c                 C   s   dS )z"Saves the data to the persist_pathNr   r   r$   r   r   r   save#   s    zBaseSerializer.savec                 C   s   dS )z$Loads the data from the persist_pathNr   r   r   r   r   load'   s    zBaseSerializer.load)__name__
__module____qualname____doc__strr   classmethodr   r"   r   r&   r(   r   r   r   r   r      s   r   c                   @   s@   e Zd ZdZeedddZeddddZedd	d
Z	dS )JsonSerializerzKSerialize data in JSON using the json package from python standard library.r   c                 C   s   dS )Njsonr   r    r   r   r   r"   /   s    zJsonSerializer.extensionNr#   c                 C   s<   t | jd}t|| W d    n1 s.0    Y  d S )Nw)openr   r0   dumpr   r$   fpr   r   r   r&   3   s    zJsonSerializer.savec                 C   s:   t | jd}t|W  d    S 1 s,0    Y  d S )Nr)r2   r   r0   r(   r   r5   r   r   r   r(   7   s    zJsonSerializer.load)
r)   r*   r+   r,   r.   r-   r"   r   r&   r(   r   r   r   r   r/   ,   s
   r/   c                       sX   e Zd ZdZedd fddZeedddZedd	d
dZ	edddZ
  ZS )BsonSerializerz>Serialize data in Binary JSON using the `bson` python package.Nr   c                    s   t  | td| _d S Nbson)superr   r   r:   r   	__class__r   r   r   ?   s    zBsonSerializer.__init__r   c                 C   s   dS r9   r   r    r   r   r   r"   C   s    zBsonSerializer.extensionr#   c                 C   sB   t | jd"}|| j| W d    n1 s40    Y  d S )Nwb)r2   r   writer:   dumpsr4   r   r   r   r&   G   s    zBsonSerializer.savec                 C   s@   t | jd }| j| W  d    S 1 s20    Y  d S )Nrb)r2   r   r:   loadsreadr7   r   r   r   r(   K   s    zBsonSerializer.loadr)   r*   r+   r,   r-   r   r.   r"   r   r&   r(   __classcell__r   r   r<   r   r8   <   s   r8   c                       sX   e Zd ZdZedd fddZeedddZedd	d
dZ	edddZ
  ZS )ParquetSerializerzFSerialize data in `Apache Parquet` format using the `pyarrow` package.Nr   c                    s.   t  | td| _td| _td| _d S )NZpandasZpyarrowzpyarrow.parquet)r;   r   r   pdpapqr   r<   r   r   r   S   s    

zParquetSerializer.__init__r   c                 C   s   dS )Nparquetr   r    r   r   r   r"   Y   s    zParquetSerializer.extensionr#   c              
   C   s   | j |}| jj|}tj| jrt	| jd }t
| j| z| j|| j W n6 ty } zt
|| j |W Y d }~qd }~0 0 t| n| j|| j d S )Nz-backup)rG   Z	DataFramerH   TableZfrom_pandasospathexistsr   r-   renamerI   Zwrite_table	Exceptionremove)r   r$   dftableZbackup_pathexcr   r   r   r&   ]   s    zParquetSerializer.savec                 C   s(   | j | j}| }dd | D S )Nc                 S   s   i | ]\}}||  qS r   )tolist).0colZseriesr   r   r   
<dictcomp>p       z*ParquetSerializer.load.<locals>.<dictcomp>)rI   Z
read_tabler   Z	to_pandasitems)r   rS   rR   r   r   r   r(   m   s    zParquetSerializer.loadrD   r   r   r<   r   rF   P   s   rF   r0   r:   rJ   SERIALIZER_MAPc                   @   s   e Zd ZdZdS )SKLearnVectorStoreExceptionz'Exception raised by SKLearnVectorStore.N)r)   r*   r+   r,   r   r   r   r   r]   z   s   r]   c                
   @   s  e Zd ZdZddddeee ed eedddd	Z	e
ed
ddZdd
ddZdd
ddZd+ee eee  eee  eee dddZdd
ddZedee eeeeeef  dddZedeeeeeeef  dddZefeeeee dddZefeeeeeeef  ddd Zeed!fee eeeeee d"d#d$Zeed!feeeeeee d%d&d'Zed,ee eeee  eee  ee ed d(d)d*Z dS )-SKLearnVectorStorezYSimple in-memory vector store based on the `scikit-learn` library
    `NearestNeighbors`.Nr0   Zcosine)r   
serializermetricr[   )	embeddingr   r_   r`   kwargsr   c          	      K   s   t d}t ddd}|| _|jf d|i|| _d| _|| _|| _d | _| jd urht| }|| jd| _g | _	g | _
g | _g | _|g | _| jd urtj| jr|   d S )Nnumpyzsklearn.neighborszscikit-learn)Zpip_namer`   Fr   )r   _npZNearestNeighbors
_neighbors_neighbors_fitted_embedding_functionZ_persist_path_serializerr\   _embeddings_texts
_metadatas_idsasarray_embeddings_nprL   rM   isfile_load)	r   ra   r   r_   r`   rb   npZsklearn_neighborsZserializer_clsr   r   r   r      s$    	
zSKLearnVectorStore.__init__r   c                 C   s   | j S r   )rg   r'   r   r   r   
embeddings   s    zSKLearnVectorStore.embeddingsc                 C   s8   | j d u rtd| j| j| j| jd}| j | d S )NzFYou must specify a persist_path on creation to persist the collection.)idstexts	metadatasrr   )rh   r]   rl   rj   rk   ri   r&   r%   r   r   r   persist   s    
zSKLearnVectorStore.persistc                 C   sP   | j d u rtd| j  }|d | _|d | _|d | _|d | _|   d S )NzCYou must specify a persist_path on creation to load the collection.rr   rt   ru   rs   )rh   r]   r(   ri   rj   rk   rl   _update_neighborsr%   r   r   r   rp      s    





zSKLearnVectorStore._load)rt   ru   rs   rb   r   c                 K   sl   t |}|pdd |D }| j| | j| j| | j|pPi gt|  | j| | 	  |S )Nc                 S   s   g | ]}t t qS r   )r-   r   )rV   _r   r   r   
<listcomp>   rY   z0SKLearnVectorStore.add_texts.<locals>.<listcomp>)
listrj   extendri   rg   Zembed_documentsrk   lenrl   rw   )r   rt   ru   rs   rb   rj   rl   r   r   r   	add_texts   s    zSKLearnVectorStore.add_textsc                 C   s>   t | jdkrtd| j| j| _| j| j d| _d S )Nr   (No data was added to SKLearnVectorStore.T)	r|   ri   r]   rd   rm   rn   re   fitrf   r'   r   r   r   rw      s    z$SKLearnVectorStore._update_neighbors)k)query_embeddingr   rb   r   c                K   s:   | j std| jj|g|d\}}tt|d |d S )zgSearch k embeddings similar to the query embedding. Returns a list of
        (index, distance) tuples.r~   )Zn_neighborsr   )rf   r]   re   Z
kneighborsrz   zip)r   r   r   rb   Zneigh_distsZ
neigh_idxsr   r   r   #_similarity_index_search_with_score   s    
z6SKLearnVectorStore._similarity_index_search_with_score)queryr   rb   r   c                   s4    j |} j|fd|i|} fdd|D S )Nr   c                    s:   g | ]2\}}t  j| d  j| i j| d|fqS id)Zpage_contentmetadatar   rj   rl   rk   )rV   idxdistr'   r   r   ry      s   zCSKLearnVectorStore.similarity_search_with_score.<locals>.<listcomp>)rg   embed_queryr   )r   r   r   rb   r   indices_distsr   r'   r   similarity_search_with_score   s    
z/SKLearnVectorStore.similarity_search_with_scorec                 K   s$   | j |fd|i|}dd |D S )Nr   c                 S   s   g | ]\}}|qS r   r   )rV   docrx   r   r   r   ry     rY   z8SKLearnVectorStore.similarity_search.<locals>.<listcomp>)r   )r   r   r   rb   Zdocs_scoresr   r   r   similarity_search   s    z$SKLearnVectorStore.similarity_searchc                 K   sB   | j |fd|i|}t| \}}dd |D }ttt||S )Nr   c                 S   s   g | ]}d t | qS )   )mathexp)rV   r   r   r   r   ry   	  rY   zOSKLearnVectorStore._similarity_search_with_relevance_scores.<locals>.<listcomp>)r   r   rz   )r   r   r   rb   Z
docs_distsdocsdistsZscoresr   r   r   (_similarity_search_with_relevance_scores  s    z;SKLearnVectorStore._similarity_search_with_relevance_scoresg      ?)ra   r   fetch_klambda_multrb   r   c                    sr   j |fd|i|}t| \ }j f }tjj|jjd|||d}	 fdd|	D }
fdd|
D S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )Zdtype)r   r   c                    s   g | ]} | qS r   r   )rV   i)indicesr   r   ry   -  rY   zNSKLearnVectorStore.max_marginal_relevance_search_by_vector.<locals>.<listcomp>c                    s2   g | ]*}t  j| d  j| i j| dqS r   r   )rV   r   r'   r   r   ry   .  s
   )r   r   rn   r   rd   arrayZfloat32)r   ra   r   r   r   rb   r   rx   Zresult_embeddingsZmmr_selectedZmmr_indicesr   )r   r   r   'max_marginal_relevance_search_by_vector  s&    
z:SKLearnVectorStore.max_marginal_relevance_search_by_vector)r   r   r   r   rb   r   c                 K   s4   | j du rtd| j |}| j||||d}|S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        NzCFor MMR search, you must specify an embedding function on creation.)Z
lambda_mul)rg   
ValueErrorr   r   )r   r   r   r   r   rb   ra   r   r   r   r   max_marginal_relevance_search6  s    
z0SKLearnVectorStore.max_marginal_relevance_search)rt   ra   ru   rs   r   rb   r   c                 K   s(   t |fd|i|}|j|||d |S )Nr   )ru   rs   )r^   r}   )r!   rt   ra   ru   rs   r   rb   vsr   r   r   
from_textsW  s    
zSKLearnVectorStore.from_texts)NN)NNN)!r)   r*   r+   r,   r   r	   r-   r   r   r   propertyrr   rv   rp   r   r   dictr}   rw   	DEFAULT_Kfloatintr
   r   r   r   r   r   DEFAULT_FETCH_Kr   r   r.   r   r   r   r   r   r^      s   #  



-!   

r^   )(r,   r0   r   rL   abcr   r   typingr   r   r   r   r   r	   r
   r   uuidr   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   r   r   r   r/   r8   rF   r\   r-   __annotations__RuntimeErrorr]   r^   r   r   r   r   <module>   s.   ($