a
    bg`                     @   sp   d dl mZmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ G dd de
Zd	S )
    )AnyCallableDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)pre_init)Fieldc                   @   s\  e Zd ZU dZdZeed< eed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< eZeed
< eg Zee ed< edZeed< edZeed< edZeed< edZeee  ed< eeeef eeef dddZeedddZeedddZd eeee  ee  eedddZ!d!eeee  ee  ee"e# dddZ$dS )"	ExLlamaV2a+  ExllamaV2 API.

    - working only with GPTQ models for now.
    - Lora models are not supported yet.

    To use, you should have the exllamav2 library installed, and provide the
    path to the Llama model as a named parameter to the constructor.
    Check out:

    Example:
        .. code-block:: python

            from langchain_community.llms import Exllamav2

            llm = Exllamav2(model_path="/path/to/llama/model")

    #TODO:
    - Add loras support
    - Add support for custom settings
    - Add support for custom stop sequences
    Nclient
model_pathexllama_cacheconfig	generator	tokenizersettingslogfuncstop_sequences   max_new_tokensT	streamingverbosedisallowed_tokens)valuesreturnc              
   C   s  zdd l }W n. ty: } ztd|W Y d }~n
d }~0 0 |j sNtdz,ddlm}m}m}m	} ddl
m}m}	 W n ty   tdY n0 |d }
|
sdd	 |d
< |d
 }|d r|d }||j ntd| }|d |_|  ||}||dd}|| ||}|d r0|	|||}n||||}dd |d D |d< t|d|d  |d|d   |d}|r||| ||d< ||d< ||d< ||d< ||d< |S )Nr   z@Unable to import torch, please install with `pip install torch`.z/CUDA is not available. ExllamaV2 requires CUDA.)r   ExLlamaV2CacheExLlamaV2ConfigExLlamaV2Tokenizer)ExLlamaV2BaseGeneratorExLlamaV2StreamingGeneratorzCould not import exllamav2 library. Please install the exllamav2 library with (cuda 12.1 is required)example : !python -m pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whlr   c                  _   s   d S )N )argskwargsr#   r#   p/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/llms/exllamav2.py<lambda>_       z0ExLlamaV2.validate_environment.<locals>.<lambda>r   r   z<settings is required. Custom settings are not supported yet.r   T)Zlazyr   c                 S   s   g | ]}|   qS r#   )striplower).0xr#   r#   r&   
<listcomp>z   r(   z2ExLlamaV2.validate_environment.<locals>.<listcomp>r   zstop_sequences r   r   r   r   r   r   )torchImportErrorcudaZis_availableEnvironmentErrorZ	exllamav2r   r   r   r    Zexllamav2.generatorr!   r"   __dict__NotImplementedErrorZ	model_dirprepareZload_autosplitsetattrgetZdisallow_tokens)clsr   r.   er   r   r   r    r!   r"   r   r   r   r   modelr   r   r   Z
disallowedr#   r#   r&   validate_environment>   sb    





zExLlamaV2.validate_environment)r   c                 C   s   dS )zReturn type of llm.r   r#   )selfr#   r#   r&   	_llm_type   s    zExLlamaV2._llm_type)textr   c                 C   s   | j j|S )z-Get the number of tokens present in the text.)r   r   
num_tokens)r;   r=   r#   r#   r&   get_num_tokens   s    zExLlamaV2.get_num_tokens)promptstoprun_managerr%   r   c           	      K   sd   | j }| jr8d}| j||||dD ]}|t|7 }q"|S |j|| j| jd}|t|d  }|S d S )N )r@   rA   rB   r%   )r@   Zgen_settingsr>   )r   r   _streamstrZgenerate_simpler   r   len)	r;   r@   rA   rB   r%   r   Zcombined_text_outputchunkoutputr#   r#   r&   _call   s    
zExLlamaV2._callc           
      k   s~   | j |}| j  | jg  | j|| j d}| j \}}}	|d7 }|rb|j|| j	d |V  |sz|| j
kr6qzq6d S )Nr      )tokenr   )r   encoder   ZwarmupZset_stop_conditionsZbegin_streamr   streamZon_llm_new_tokenr   r   )
r;   r@   rA   rB   r%   Z	input_idsZgenerated_tokensrG   Zeos_r#   r#   r&   rD      s     
zExLlamaV2._stream)NN)NN)%__name__
__module____qualname____doc__r   r   __annotations__rE   r   r   r   r   r   printr   r   r   r   r   r   intr   boolr   r   r   r   r   r:   propertyr<   r?   r   rI   r   r
   rD   r#   r#   r#   r&   r   
   sJ   
"K  
  
r   N)typingr   r   r   r   r   r   Zlangchain_core.callbacksr   Zlangchain_core.language_modelsr	   Zlangchain_core.outputsr
   Zlangchain_core.utilsr   Zpydanticr   r   r#   r#   r#   r&   <module>   s    