a
    bg                     @   sJ   d dl mZmZmZmZ d dlmZ d dlmZm	Z	 G dd deeZ
dS )    )AnyDictListOptional)
Embeddings)	BaseModel
ConfigDictc                       s   e Zd ZdZd eeeee ee eee ee e	dd
 fddZ
dd	d
dZedddZe	e	dddZee	e	dddZee	e	e	dddZee eee  dddZee eee  dddZeee dddZ  ZS )!QuantizedBiEncoderEmbeddingsaK  Quantized bi-encoders embedding models.

    Please ensure that you have installed optimum-intel and ipex.

    Input:
        model_name: str = Model name.
        max_seq_len: int = The maximum sequence length for tokenization. (default 512)
        pooling_strategy: str =
            "mean" or "cls", pooling strategy for the final layer. (default "mean")
        query_instruction: Optional[str] =
            An instruction to add to the query before embedding. (default None)
        document_instruction: Optional[str] =
            An instruction to add to each document before embedding. (default None)
        padding: Optional[bool] =
            Whether to add padding during tokenization or not. (default True)
        model_kwargs: Optional[Dict] =
            Parameters to add to the model during initialization. (default {})
        encode_kwargs: Optional[Dict] =
            Parameters to add during the embedding forward pass. (default {})

    Example:

    from langchain_community.embeddings import QuantizedBiEncoderEmbeddings

    model_name = "Intel/bge-small-en-v1.5-rag-int8-static"
    encode_kwargs = {'normalize_embeddings': True}
    hf = QuantizedBiEncoderEmbeddings(
        model_name,
        encode_kwargs=encode_kwargs,
        query_instruction="Represent this sentence for searching relevant passages: "
    )
       meanNT)

model_namemax_seq_lenpooling_strategyquery_instructiondocument_instructionpaddingmodel_kwargsencode_kwargskwargsreturnc	           
         sv   t  jf i |	 || _|| _|| _|| _|p0i | _|p:i | _| jdd| _	| jdd| _
|| _|| _|   d S )NZnormalize_embeddingsF
batch_size    )super__init__model_name_or_pathr   poolingr   r   r   get	normalizer   r   r   
load_model)
selfr   r   r   r   r   r   r   r   r   	__class__ z/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/embeddings/optimum_intel.pyr   )   s    

z%QuantizedBiEncoderEmbeddings.__init__)r   c              
   C   s   zddl m} W n. ty> } ztd|W Y d }~n
d }~0 0 z(ddlm} |j| jfi | j| _W n< t	y } z$t	d| j d| dW Y d }~n
d }~0 0 |j| jd| _
| j  d S )	Nr   )AutoTokenizerzQUnable to import transformers, please install with `pip install -U transformers`.)	IPEXModelz
Failed to load model z, due to the following error:
a  
Please ensure that you have installed optimum-intel and ipex correctly,using:

pip install optimum[neural-compressor]
pip install intel_extension_for_pytorch

For more information, please visit:
* Install optimum-intel as shown here: https://github.com/huggingface/optimum-intel.
* Install IPEX as shown here: https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=cpu&version=v2.2.0%2Bcpu.
)Zpretrained_model_name_or_path)Ztransformersr$   ImportErrorZoptimum.intelr%   Zfrom_pretrainedr   r   transformer_model	Exceptiontransformer_tokenizereval)r   r$   er%   r"   r"   r#   r   E   s6    z'QuantizedBiEncoderEmbeddings.load_modelZallowr"   )extraZprotected_namespaces)inputsr   c              
   C   s   zdd l }W n. ty: } ztd|W Y d }~n
d }~0 0 | x | jf i |}| jdkrr| ||d }n| jdkr| |}ntd| jr|j	j
j|ddd	}|W  d    S 1 s0    Y  d S )
Nr   CUnable to import torch, please install with `pip install -U torch`.r   attention_maskclszpooling method no supported      )pdim)torchr&   Zinference_moder'   r   _mean_pooling_cls_pooling
ValueErrorr   nnZ
functional)r   r-   r5   r+   outputsZembr"   r"   r#   _embedl   s$    


z#QuantizedBiEncoderEmbeddings._embed)r:   r   c                 C   s,   t | tr| d }n| d }|d d df S )Nlast_hidden_stater   )
isinstancedict)r:   token_embeddingsr"   r"   r#   r7      s    

z)QuantizedBiEncoderEmbeddings._cls_pooling)r:   r/   r   c              
   C   s   zdd l }W n. ty: } ztd|W Y d }~n
d }~0 0 t| trP| d }n| d }|d|  }||| d}|j	|ddd}|| S )Nr   r.   r<   r2   g&.>)min)
r5   r&   r=   r>   Z	unsqueezeexpandsizefloatsumclamp)r:   r/   r5   r+   r?   Zinput_mask_expandedZsum_embeddingsZsum_maskr"   r"   r#   r6      s     

z*QuantizedBiEncoderEmbeddings._mean_pooling)textsr   c                 C   s&   | j || jd| jdd}| | S )NTpt)
max_lengthZ
truncationr   Zreturn_tensors)r)   r   r   r;   tolist)r   rG   r-   r"   r"   r#   _embed_text   s    z(QuantizedBiEncoderEmbeddings._embed_textc           
   
      s   zddl }W n. ty: } ztd|W Y d}~n
d}~0 0 zddlm} W n. tyz } ztd|W Y d}~n
d}~0 0  fdd|D }|j|dgd	 }|d
  j |d< t|dgd t}g }||ddD ]}	| 	|	7 }q|S )zEmbed a list of text documents using the Optimized Embedder model.

        Input:
            texts: List[str] = List of text documents to embed.
        Output:
            List[List[float]] = The embeddings of each text document.
        r   NzEUnable to import pandas, please install with `pip install -U pandas`.)tqdmzAUnable to import tqdm, please install with `pip install -U tqdm`.c                    s    g | ]} j r j | n|qS r"   )r   ).0dr   r"   r#   
<listcomp>   s   z@QuantizedBiEncoderEmbeddings.embed_documents.<locals>.<listcomp>rG   )columnsindexZbatch_indexZBatches)desc)
Zpandasr&   rL   Z	DataFrameZreset_indexr   listgroupbyapplyrK   )
r   rG   pdr+   rL   docsZtext_list_dfZbatchesZvectorsbatchr"   rO   r#   embed_documents   s4    
z,QuantizedBiEncoderEmbeddings.embed_documents)textr   c                 C   s    | j r| j | }| |gd S )Nr   )r   rK   )r   r[   r"   r"   r#   embed_query   s    
z(QuantizedBiEncoderEmbeddings.embed_query)r
   r   NNTNN)__name__
__module____qualname____doc__strintr   boolr   r   r   r   r   Zmodel_configr;   staticmethodr7   r6   r   rD   rK   rZ   r\   __classcell__r"   r"   r    r#   r	      sB   $       "
'r	   N)typingr   r   r   r   Zlangchain_core.embeddingsr   Zpydanticr   r   r	   r"   r"   r"   r#   <module>   s   