a
    bg                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d d
lmZ  G dd deZ!dS )    )annotationsN)contextmanager)StringIO)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTupleType)
Embeddings)VectorStore)Document)
connection)cursorc                   @  s<  e Zd ZdZG dd deejZG dd dZddddd	d
d
dddddddZ	G dd dZ
dddddZdddddZdjd
ddddddZdkdd
ddddd Zdd!d"d#Zdld$d%d&d'd(d)d*Zdd+d+dd,d-d.Zedmd2d'd	d%d
d
d
dd&d d3
d4d5Zdnd6d7d&dd8d9d:Zdodd
d
dd;d<d=Zd>d?d@dAdBZdpd>dDd&dEdFdGdHZdqd
dDd&dIdJdKdLZdrd
dDd&dEdJdMdNZdsd>dDd&dIdFdOdPZdtddQddRdSdTZdd
d
ddUdVdWZddDddXdYdZZdddd[d\Zdddd]d^Zd_dd`dadbZ d_dd`dcddZ!d_d?ddedfdgZ"dd!dhdiZ#dS )uYellowbrickzYellowbrick as a vector database.
    Example:
        .. code-block:: python
            from langchain_community.vectorstores import Yellowbrick
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            ...
    c                   @  s   e Zd ZdZdZdZdS )zYellowbrick.IndexTypez<Enumerator for the supported Index types within Yellowbrick.noneZlshN)__name__
__module____qualname____doc__NONELSH r   r   z/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/yellowbrick.py	IndexType*   s   r   c                   @  s6   e Zd ZdZddddddZddd	d	d
ddZdS )Yellowbrick.IndexParamsz/Parameters for configuring a Yellowbrick index.Nz!Optional['Yellowbrick.IndexType']zOptional[Dict[str, Any]])
index_typeparamsc                 C  s$   |d u rt jj}|| _|pi | _d S N)r   r   r   r   r    )selfr   r    r   r   r   __init__3   s    z Yellowbrick.IndexParams.__init__strr   )keydefaultreturnc                 C  s   | j ||S r!   )r    get)r"   r%   r&   r   r   r   	get_param=   s    z!Yellowbrick.IndexParams.get_param)NN)N)r   r   r   r   r#   r)   r   r   r   r   IndexParams0   s
     
r*   NF)schemaloggerdropr   r$   zOptional[str]zOptional[logging.Logger]boolNone)	embeddingconnection_stringtabler+   r,   r-   r'   c                C  s`  ddl m} |  |r || _nNtt| _| jtj t	 }|tj
 td}	||	 | j| t|ts| jd dS d| _d| _d| _|| _t|| j| _t| jj || _|| _|| _d| _|   | j  b}
|r(| j!| j| j|
d	 | j!| j| j | j|
d	 | "|
 | #|
 | $|
 W d   n1 sR0    Y  dS )
zInitialize with yellowbrick client.
        Args:
            embedding: Embedding operator
            connection_string: Format 'postgres://username:password@host:port/database'
            table: Table used to store / retrieve embeddings from
        r   )extrasz)%(asctime)s - %(levelname)s - %(message)sz+embeddings input must be Embeddings object.NZ
_lsh_indexZ_lsh_hyperplane_content)r2   r+   r   )%psycopg2r3   Zregister_uuidr,   logging	getLoggerr   setLevelERRORStreamHandlerDEBUG	FormattersetFormatter
addHandler
isinstancer   errorLSH_INDEX_TABLELSH_HYPERPLANE_TABLECONTENT_TABLEr1   r   DatabaseConnectionr   atexitregisterclose_connection_schema_table
_embeddingZ_max_embedding_len_check_database_utf8
get_cursorr-   _drop_lsh_index_tables_create_schema_create_table)r"   r0   r1   r2   r+   r,   r-   r3   handler	formatterr   r   r   r   r#   @   sH    





zYellowbrick.__init__c                      s   e Zd ZU dZded< dZded< ded< dddd	 fd
dZddddZddddZe	ddddZ
e	ddddZ  ZS )zYellowbrick.DatabaseConnectionNr$   _connection_stringzOptional['PgConnection']_connectionzlogging.Logger_loggerz 'Yellowbrick.DatabaseConnection')r1   r,   r'   c                   s.   | j d u r(t | | _ || j _|| j _| j S r!   )	_instancesuper__new__rR   rT   )clsr1   r,   	__class__r   r   rW      s
    
z&Yellowbrick.DatabaseConnection.__new__r/   r'   c                 C  s"   | j r| j js| j   d | _ d S r!   )rS   closedclose)r"   r   r   r   rG      s    
z/Yellowbrick.DatabaseConnection.close_connectionz'PgConnection'c                 C  s2   dd l }| jr| jjr,|| j| _d| j_| jS )Nr   F)r5   rS   r\   connectrR   Z
autocommit)r"   r5   r   r   r   get_connection   s
    z-Yellowbrick.DatabaseConnection.get_connectionz%Generator['PgConnection', None, None]c              
   c  sr   ddl m} |  }z
|V  W nF |yd } z.|  | jjddd td|W Y d }~nd }~0 0 |  d S )Nr   )DatabaseErrorz2Database error occurred, rolling back transaction.T)exc_infozDatabase transaction failed.)r5   r`   r_   rollbackrT   r@   RuntimeErrorcommit)r"   r`   conner   r   r   get_managed_connection   s    
 z5Yellowbrick.DatabaseConnection.get_managed_connectionz!Generator['PgCursor', None, None]c              	   c  sR   |   6}| }z|V  W |  n
|  0 W d    n1 sD0    Y  d S r!   )rg   r   r]   )r"   re   r   r   r   r   rL      s
    
z)Yellowbrick.DatabaseConnection.get_cursor)r   r   r   rU   __annotations__rS   rW   rG   r_   r   rg   rL   __classcell__r   r   rY   r   rD   ~   s   
		rD   z
'PgCursor')r   r'   c                 C  s6   ddl m} | jr2||dj|| jd dS )z>
        Helper function: create schema if not exists
        r   sqlzE
                    CREATE SCHEMA IF NOT EXISTS {s}
                )sN)r5   rk   rH   executeSQLformat
Identifier)r"   r   rk   r   r   r   rN      s    
zYellowbrick._create_schemac           
      C  s   ddl m} | jr| jfnd}|jg || j| j R  }|| j| j d }||dj||d | jrx| jfnd}|jg || jR  }|jg || j| j R  }|| j| j d }|| j| j d }	||d	j||||	d
 dS )z=
        Helper function: create table if not exists
        r   rj   r   
_pk_doc_ida0  
                CREATE TABLE IF NOT EXISTS {t} (
                doc_id UUID NOT NULL,
                text VARCHAR(60000) NOT NULL,
                metadata VARCHAR(1024) NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            tcZ_pk_doc_id_embedding_id
_fk_doc_ida  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                embedding_id SMALLINT NOT NULL,
                embedding FLOAT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, embedding_id),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            t1t2c1c2N)	r5   rk   rH   rp   rI   rC   rm   rn   ro   )
r"   r   rk   schema_prefixrs   rt   rw   rx   ry   rz   r   r   r   rO      s:    
zYellowbrick._create_tablezOptional['PgCursor'])r2   r+   r   r'   c                 C  sX   |du rD| j   }| j|||d W d   qT1 s80    Y  n| j|||d dS )z
        Helper function: Drop data. If a cursor is provided, use it;
        otherwise, obtain a new cursor for the operation.
        N)r+   )r   rL   _drop_table)r"   r2   r+   r   r   r   r   r-      s    
0zYellowbrick.drop)r   r2   r+   r'   c                 C  sF   ddl m} |r|||}n
||}|d|}|| dS )zI
        Executes the drop table command using the given cursor.
        r   rj   z1
        DROP TABLE IF EXISTS {} CASCADE
        N)r5   rk   rp   rn   ro   rm   )r"   r   r2   r+   rk   
table_nameZdrop_table_queryr   r   r   r|     s    	
zYellowbrick._drop_tabler[   c                 C  sl   | j  *}d}|| | d }W d   n1 s:0    Y  | dks\| dkr`dS tddS )	zE
        Helper function: Test the database is UTF-8 encoded
        z
                SELECT pg_encoding_to_char(encoding)
                FROM pg_database
                WHERE datname = current_database();
            r   Nutf8zutf-8TzDatabase encoding is not UTF-8F)r   rL   rm   fetchonelower	Exception)r"   r   queryencodingr   r   r   rK     s    
*z Yellowbrick._check_database_utf8zIterable[str]zOptional[List[dict]]r   z	List[str])texts	metadataskwargsr'   c              
   K  s  d}t |}| jt |}g }|s2dd |D }|dpBt }| j }t }	t }
t	j
|	ddt	jd}t	j
|
ddt	jd}d}t|D ]\}}tt }|| |||t|| g t|| D ]\}}||||g q|d	7 }||kr| ||	|
 |	d |	d |
d |
d d}q|dkrT| ||	|
 W d    n1 sj0    Y  |jtjjkr| |t| |S )
Ni'  c                 S  s   g | ]}i qS r   r   ).0_r   r   r   
<listcomp>?      z)Yellowbrick.add_texts.<locals>.<listcomp>index_params	")	delimiter	quotecharquotingr      )listrJ   Zembed_documentsr(   r   r*   r   rL   r   csvwriterQUOTE_MINIMAL	enumerater$   uuiduuid4appendwriterowjsondumps_copy_to_dbseektruncater   r   r   _update_indexUUID)r"   r   r   r   Z
batch_size
embeddingsresultsr   r   
content_ioembeddings_ioZcontent_writerZembeddings_writerZcurrent_batch_sizeitextZdoc_uuidembedding_idr0   r   r   r   	add_texts3  sH    







.zYellowbrick.add_textsr   )r   r   r   r'   c           	      C  s   | d | d ddlm} | jr.| jfnd}|jg || j| j R  }|dj|d}|	|| | jrz| jfnd}|jg || jR  }|dj|d}|	|| d S )Nr   rj   r   z
            COPY {table} (doc_id, text, metadata) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )r2   z
            COPY {table} (doc_id, embedding_id, embedding) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )
r   r5   rk   rH   rp   rI   rC   rn   ro   Zcopy_expert)	r"   r   r   r   rk   r{   r2   Zcontent_copy_queryZembeddings_copy_queryr   r   r   r   j  s&    

zYellowbrick._copy_to_db 	langchainpubliczType[Yellowbrick])
rX   r   r0   r   r1   r2   r+   r-   r   r'   c           
      K  s,   | |||||d}	|	j f ||d| |	S )a  Add texts to the vectorstore index.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            connection_string: URI to Yellowbrick instance
            embedding: Embedding function
            table: table to store embeddings
            kwargs: vectorstore specific parameters
        )r0   r1   r2   r+   r-   )r   r   )r   )
rX   r   r0   r   r1   r2   r+   r-   r   Zvssr   r   r   
from_texts  s    zYellowbrick.from_textszOptional[List[str]]zOptional[bool])ids
delete_allr   r'   c                   sn  ddl m  |r d}nJ|dur^t fdd|D } d|} dj|d	}ntd
| jrt| jfnd}| j	 } j
g || j| j R  }	 dj|	|d}
||
  j
g || jR  }	 dj|	|d}
||
 | j|| j| j g|R  rJ j
g || j| j R  }	 dj|	|d}
||
 W d   n1 s`0    Y  dS )zxDelete vectors by uuids.

        Args:
            ids: List of ids to delete, where each id is a uuid string.
        r   rj   z'
                WHERE 1=1
            Nc                 3  s   | ]}  |V  qd S r!   )Literal)r   idrj   r   r   	<genexpr>  r   z%Yellowbrick.delete.<locals>.<genexpr>z, z5
                WHERE doc_id IN ({ids})
            )r   z*Either ids or delete_all must be provided.r   zDELETE FROM {table} {where_sql})r2   	where_sql)r5   rk   rn   tuplejoinro   
ValueErrorrH   r   rL   rp   rI   rC   rm   _table_existsrA   )r"   r   r   r   r   ZuuidsZids_formattedr{   r   Ztable_identifierr   r   rj   r   delete  sZ    







*zYellowbrick.delete)r   r}   r+   r'   c                 C  sJ   ddl m} ||}||}||dj||d | d dkS )z>
        Checks if a table exists in the given schema
        r   rj   z
                SELECT COUNT(*)
                FROM sys.table t INNER JOIN sys.schema s ON t.schema_id = s.schema_id
                WHERE s.name = {schema} AND t.name = {table_name}
            )r+   r}   )r5   rk   r   rm   rn   ro   r   )r"   r   r}   r+   rk   r   r   r   r     s    

zYellowbrick._table_existszList[float]z	uuid.UUID)vectorr'   c                 C  sF   dd l }dtt|}|| }| }tj|d d d}|S )Nr   ,   )bytes)	hashlibr   mapr$   sha1encodedigestr   r   )r"   r   r   Z
vector_strZhash_objectZhash_digestZvector_uuidr   r   r   _generate_vector_uuid  s    z!Yellowbrick._generate_vector_uuid   intzList[Tuple[Document, float]])r0   kr   r'   c                   s,  ddl m} ddlm} |dp(t }| j }d| j	 }| 
| |d||}	||	  fddt|D }
|d	||}||||
 ||}| jr| jfnd
}|jg || j	R  }|jg || j	| j R  }|jtjjkr| j	d }| ||| | jr*| jfnd
}|jg || j	| j R  }||}|dj|||||||ddd}|||f | }n,|dj|||d}|||f | }W d   n1 s0    Y  g }|D ]<}t|d p i }t|d |d}|||d f q|S )a  Perform a similarity search with Yellowbrick with vector

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document, float]: List of Documents and scores
        r   rj   )execute_valuesr   Ztmp_z 
                CREATE TEMPORARY TABLE {} (
                doc_id UUID,
                embedding_id SMALLINT,
                embedding FLOAT)
                ON COMMIT DROP
                DISTRIBUTE REPLICATE
            c                   s   g | ]\}}t  ||fqS r   )r$   )r   r   Zembedding_valueZ
tmp_doc_idr   r   r   %  s   zFYellowbrick.similarity_search_with_score_by_vector.<locals>.<listcomp>z:INSERT INTO {} (doc_id, embedding_id, embedding) VALUES %sr   Z	_tmp_hasha/  
                    WITH index_docs AS (
                    SELECT
                        t1.doc_id,
                        SUM(ABS(t1.hash-t2.hash)) as hamming_distance
                    FROM
                        {lsh_index} t1
                    INNER JOIN
                        {input_hash_table} t2
                    ON t1.hash_index = t2.hash_index
                    GROUP BY t1.doc_id
                    HAVING hamming_distance <= {hamming_distance}
                    )
                    SELECT
                        text,
                        metadata,
                       SUM(v1.embedding * v2.embedding) /
                        (SQRT(SUM(v1.embedding * v1.embedding)) *
                       SQRT(SUM(v2.embedding * v2.embedding))) AS score
                    FROM
                        {v1} v1
                    INNER JOIN
                        {v2} v2
                    ON v1.embedding_id = v2.embedding_id
                    INNER JOIN
                        {v3} v3
                    ON v2.doc_id = v3.doc_id
                    INNER JOIN
                        index_docs v4
                    ON v2.doc_id = v4.doc_id
                    GROUP BY v3.doc_id, v3.text, v3.metadata
                    ORDER BY score DESC
                    LIMIT %s
                hamming_distance)v1v2v3	lsh_indexinput_hash_tabler   a  
                    SELECT 
                        text,
                        metadata,
                        score
                    FROM
                        (SELECT
                            v2.doc_id doc_id,
                            SUM(v1.embedding * v2.embedding) /
                            (SQRT(SUM(v1.embedding * v1.embedding)) *
                            SQRT(SUM(v2.embedding * v2.embedding))) AS score
                        FROM
                            {v1} v1
                        INNER JOIN
                            {v2} v2
                        ON v1.embedding_id = v2.embedding_id
                        GROUP BY v2.doc_id
                        ORDER BY score DESC LIMIT %s
                        ) v4
                    INNER JOIN
                        {v3} v3
                    ON v4.doc_id = v3.doc_id
                    ORDER BY score DESC
                )r   r   r   Nr   )Zpage_contentmetadata   )r5   rk   Zpsycopg2.extrasr   r(   r   r*   r   rL   rI   r   rn   ro   rp   rm   r   rH   rC   r   r   r   _generate_tmp_lsh_hashesrA   r   r)   Zfetchallr   loadsr   r   )r"   r0   r   r   rk   r   r   r   Ztmp_embeddings_tableZcreate_table_queryZ
data_inputinsert_queryr   r{   r   r   tmp_hash_tabler   r   Z	sql_queryr   	documentsresultr   docr   r   r   &similarity_search_with_score_by_vector  s    

	






$
-
(z2Yellowbrick.similarity_search_with_score_by_vectorzList[Document])r   r   r   r'   c                 K  s0   | j |}| jf ||d|}dd |D S )ae  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of Documents
        r0   r   c                 S  s   g | ]\}}|qS r   r   r   r   r   r   r   r   r     r   z1Yellowbrick.similarity_search.<locals>.<listcomp>rJ   Zembed_queryr   r"   r   r   r   r0   r   r   r   r   similarity_search  s    zYellowbrick.similarity_searchc                 K  s&   | j |}| jf ||d|}|S )ar  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of (Document, similarity)
        r   r   r   r   r   r   similarity_search_with_score  s    z(Yellowbrick.similarity_search_with_scorec                 K  s$   | j f ||d|}dd |D S )a  Perform a similarity search with Yellowbrick by vectors

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of documents
        r   c                 S  s   g | ]\}}|qS r   r   r   r   r   r   r     r   z;Yellowbrick.similarity_search_by_vector.<locals>.<listcomp>)r   )r"   r0   r   r   r   r   r   r   similarity_search_by_vector  s    z'Yellowbrick.similarity_search_by_vectorzOptional[uuid.UUID])r   doc_idr'   c                 C  s   ddl m} | jr| jfnd}|jg || j| j R  }|jg || j| j R  }|jg || jR  }|d|}|r|dj|	t
|dn|d}	|d}
|d	j||||	|
d
}|| dS )zAdd hashes to LSH indexr   rj   r   zINSERT INTO {}zWHERE e.doc_id = {doc_id})r   r   zGROUP BY 1, 2av  
            {query_prefix}
            SELECT
                e.doc_id as doc_id,
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {condition}
            {group_by}
        )query_prefixembedding_tablehyperplanes	conditiongroup_byN)r5   rk   rH   rp   rI   rB   rA   rn   ro   r   r$   rm   )r"   r   r   rk   r{   lsh_hyperplane_tableZlsh_index_table_idZembedding_table_idZquery_prefix_idr   r   input_queryr   r   r   _update_lsh_hashes  s:    


zYellowbrick._update_lsh_hashes)r   tmp_embedding_tabler   r'   c                 C  s   ddl m} | jr| jfnd}|jg || j| j R  }||}||}|d|}	|d}
|dj|	|||
d}|| dS )	zGenerate temp LSHr   rj   r   z+CREATE TEMPORARY TABLE {} ON COMMIT DROP ASz
GROUP BY 1a[  
            {query_prefix}
            SELECT
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {group_by}
            DISTRIBUTE REPLICATE
        )r   r   r   r   N)	r5   rk   rH   rp   rI   rB   rn   ro   rm   )r"   r   r   r   rk   r{   r   Ztmp_embedding_table_idZtmp_hash_table_idr   r   r   r   r   r   r     s,    




z$Yellowbrick._generate_tmp_lsh_hashes)r   num_hyperplanesr'   c           	      C  s   ddl m} | jr| jfnd}|jg || j| j R  }||dj|d |	 d dkrfdS |jg || jR  }||dj|d |	 d }|d7 }|d	j|
||
||d
}|| dS )z4Generate random hyperplanes and store in Yellowbrickr   rj   r   zSELECT COUNT(*) FROM {t})rs   Nz!SELECT MAX(embedding_id) FROM {t}r   a2  
            WITH parameters AS (
                SELECT {num_hyperplanes} AS num_hyperplanes,
                    {dims_per_hyperplane} AS dims_per_hyperplane
            )
            INSERT INTO {hyperplanes_table} (id, hyperplane_id, hyperplane)
                SELECT id, hyperplane_id, (random() * 2 - 1) AS hyperplane
                FROM
                (SELECT range-1 id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT num_hyperplanes FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) a,
                (SELECT range-1 hyperplane_id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT dims_per_hyperplane FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) b
        )r   Zdims_per_hyperplanehyperplanes_table)r5   rk   rH   rp   rI   rB   rm   rn   ro   r   r   )	r"   r   r   rk   r{   r   rs   Znum_dimensionsr   r   r   r   _populate_hyperplanes*  s,    
z!Yellowbrick._populate_hyperplanesc           
      C  s  ddl m} | jr| jfnd}|jg || j| j R  }|jg || j| j R  }|| j| j d }|| j| j d }||dj	||||d | jr| jfnd}|jg || j| j
 R  }|| j| j
 d }	||d	j	||	d
 dS )z&Create LSH index and hyperplane tablesr   rj   r   rq   ru   a  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                hash_index SMALLINT NOT NULL,
                hash SMALLINT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, hash_index),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            rv   Z_pk_id_hp_ida2  
                CREATE TABLE IF NOT EXISTS {t} (
                id SMALLINT NOT NULL,
                hyperplane_id SMALLINT NOT NULL,
                hyperplane FLOAT NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (id, hyperplane_id))
                DISTRIBUTE REPLICATE SORT ON (id)
            rr   N)r5   rk   rH   rp   rI   rA   rC   rm   rn   ro   rB   )
r"   r   rk   r{   rw   rx   ry   rz   rs   rt   r   r   r   _create_lsh_index_tablesT  s6    
z$Yellowbrick._create_lsh_index_tablesc                 C  s8   | j | j| j| j |d | j | j| j| j |d dS )zDrop LSH index tables)r+   r2   r   N)r-   rH   rI   rA   rB   )r"   r   r   r   r   rM     s    
z"Yellowbrick._drop_lsh_index_tablesr   )r   r'   c                 C  sn   |j tjjkrj| j B}| | | | | ||	dd | 
| W d   n1 s`0    Y  dS )z"Create index from existing vectorsr      N)r   r   r   r   r   rL   rM   r   r   r)   r   r"   r   r   r   r   r   create_index  s    

zYellowbrick.create_indexc                 C  sF   |j tjjkrB| j }| | W d   n1 s80    Y  dS )zDrop an indexN)r   r   r   r   r   rL   rM   r   r   r   r   
drop_index  s    zYellowbrick.drop_index)r   r   r'   c                 C  sH   |j tjjkrD| j }| || W d   n1 s:0    Y  dS )zHUpdate an index with a new or modified embedding in the embeddings tableN)r   r   r   r   r   rL   r   )r"   r   r   r   r   r   r   r     s    zYellowbrick._update_indexc              
   C  s8  ddl m} z| j }| jr(| jfnd}|jg || jR  }|jg || jd R  }|jg || j| j R  }|dj	||d}|
| | | |dj	||d}|
| |dj	||d}	|
|	 W d    n1 s0    Y  W n6 ty2 }
 ztd	|
 |
W Y d }
~
n
d }
~
0 0 d S )
Nr   rj   r   Z_v1zALTER TABLE {t1} RENAME TO {t2})rw   rx   z
                    INSERT INTO {t1} (doc_id, embedding_id, embedding) 
                    SELECT id, embedding_id, embedding FROM {t2}
                z
                    INSERT INTO {t1} (doc_id, text, metadata) 
                    SELECT DISTINCT id, text, metadata FROM {t2}
                zFailed to migrate schema: )r5   rk   r   rL   rH   rp   rI   rC   rn   ro   rm   rO   r   rc   )r"   rk   r   r{   r   Zold_embeddingscontentZalter_table_queryr   Zinsert_content_queryrf   r   r   r   migrate_schema_v1_to_v2  s@    



	
,z#Yellowbrick.migrate_schema_v1_to_v2)NN)N)N)Nr   r   r   F)NN)r   )r   )r   )r   )r   )N)$r   r   r   r   r$   enumEnumr   r*   r#   rD   rN   rO   r-   r|   rK   r   r   classmethodr   r   r   r   r   r   r   r   r   r   r   r   rM   r   r   r   r   r   r   r   r   r   !   sf   >66    7     $   > 
      -$*/r   )"
__future__r   rE   r   r   r   r6   r   
contextlibr   ior   typingr   r   r   r   r	   r
   r   r   r   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   Z%langchain_community.docstore.documentr   Zpsycopg2.extensionsr   ZPgConnectionr   ZPgCursorr   r   r   r   r   <module>   s    ,