a
    bg"                     @   sp   d dl Z d dlmZmZmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ dZdZd	ZG d
d de	ZdS )    N)AnyListMappingOptional)CallbackManagerForLLMRun)LLM)
ConfigDict)enforce_stop_tokenszgoogle/flan-t5-largetext2text-generation)r
   text-generationsummarizationc                   @   s   e Zd ZU dZdZeed< eZe	ed< dZ
ee ed< dZee ed< eddZede	e	ee ee	 ee ee ee ee ee eedddZeee	ef dddZee	dddZde	eee	  ee ee	dddZdS )WeightOnlyQuantPipelinea  Weight only quantized model.

    To use, you should have the `intel-extension-for-transformers` packabge and
        `transformers` package installed.
    intel-extension-for-transformers:
        https://github.com/intel/intel-extension-for-transformers

    Example using from_model_id:
        .. code-block:: python

            from langchain_community.llms import WeightOnlyQuantPipeline
            from intel_extension_for_transformers.transformers import (
                WeightOnlyQuantConfig
            )
            config = WeightOnlyQuantConfig
            hf = WeightOnlyQuantPipeline.from_model_id(
                model_id="google/flan-t5-large",
                task="text2text-generation"
                pipeline_kwargs={"max_new_tokens": 10},
                quantization_config=config,
            )
    Example passing pipeline in directly:
        .. code-block:: python

            from langchain_community.llms import WeightOnlyQuantPipeline
            from intel_extension_for_transformers.transformers import (
                AutoModelForSeq2SeqLM
            )
            from intel_extension_for_transformers.transformers import (
                WeightOnlyQuantConfig
            )
            from transformers import AutoTokenizer, pipeline

            model_id = "google/flan-t5-large"
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            config = WeightOnlyQuantConfig
            model = AutoModelForSeq2SeqLM.from_pretrained(
                model_id,
                quantization_config=config,
            )
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=10,
            )
            hf = WeightOnlyQuantPipeline(pipeline=pipe)
    Npipelinemodel_idmodel_kwargspipeline_kwargsZallow)extraF)r   taskdevice
device_mapr   r   load_in_4bitload_in_8bitquantization_configkwargsreturnc
              
   K   s$  |dur"t |tr"|dkr"tdtjddu r:tdz8ddlm}m} ddl	m
} dd	lm} dd
lm} W n ty   tdY n0 t |tr|dkr| stddt| }nt |tr|dk rd}|du r|du rd}|pi }|j|fi |}zn|dkr,|j|f|||	d|d|}n@|dv rV|j|f|||	d|d|}ntd| dt dW n8 ty } ztd| d|W Y d}~n
d}~0 0 d|v rdd | D }|pi }|f |||||d|}|jtvrtd|j dt d| f ||||d|
S )z5Construct the pipeline object from model_id and task.Nr   z7`Device` and `device_map` cannot be set simultaneously!Ztorchz;Weight only quantization pipeline only support PyTorch now!r   )AutoModelForCausalLMAutoModelForSeq2SeqLM)is_ipex_available)AutoTokenizer)r   zCould not import transformers python package. Please install it with `pip install transformers` and `pip install intel-extension-for-transformers`.z)Don't find out Intel GPU on this machine!zxpu:cpur   F)r   r   r   Zuse_llm_runtimer   )r
   r   Got invalid task , currently only  are supportedzCould not load the z# model due to missing dependencies.trust_remote_codec                 S   s   i | ]\}}|d kr||qS )r$    ).0kvr%   r%   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/llms/weight_only_quantization.py
<dictcomp>   s   z9WeightOnlyQuantPipeline.from_model_id.<locals>.<dictcomp>)r   model	tokenizerr   r   )r   r   r   r   )
isinstanceint
ValueError	importlibutil	find_specZ-intel_extension_for_transformers.transformersr   r   Z,intel_extension_for_transformers.utils.utilsr   Ztransformersr   r   ImportErrorstrZfrom_pretrainedVALID_TASKSitemsr   )clsr   r   r   r   r   r   r   r   r   r   r   r   r   r   Zhf_pipelineZ_model_kwargsr,   r+   eZ_pipeline_kwargsr   r%   r%   r)   from_model_idO   s    

	





z%WeightOnlyQuantPipeline.from_model_id)r   c                 C   s   | j | j| jdS )zGet the identifying parameters.r   r   r   r:   selfr%   r%   r)   _identifying_params   s    z+WeightOnlyQuantPipeline._identifying_paramsc                 C   s   dS )zReturn type of llm.Zweight_only_quantizationr%   r;   r%   r%   r)   	_llm_type   s    z!WeightOnlyQuantPipeline._llm_type)promptstoprun_managerr   r   c                 K   s   |  |}| j jdkr0|d d t|d }nN| j jdkrJ|d d }n4| j jdkrd|d d }ntd| j j d	t d
|rt||}|S )ab  Call the HuggingFace model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.

        Returns:
            The generated text.

        Example:
            .. code-block:: python

                from langchain_community.llms import WeightOnlyQuantPipeline
                llm = WeightOnlyQuantPipeline.from_model_id(
                    model_id="google/flan-t5-large",
                    task="text2text-generation",
                )
                llm.invoke("This is a prompt.")
        r   r   Zgenerated_textNr
   r   Zsummary_textr!   r"   r#   )r   r   lenr/   r5   r	   )r<   r?   r@   rA   r   responsetextr%   r%   r)   _call   s    

zWeightOnlyQuantPipeline._call)r   NNNFFN)NN)__name__
__module____qualname____doc__r   r   __annotations__DEFAULT_MODEL_IDr   r4   r   r   dictr   r   Zmodel_configclassmethodr.   boolr   r9   propertyr   r=   r>   r   r   rE   r%   r%   r%   r)   r      sT   
1       j  
r   )r0   typingr   r   r   r   Z langchain_core.callbacks.managerr   Z#langchain_core.language_models.llmsr   Zpydanticr   Zlangchain_community.llms.utilsr	   rK   ZDEFAULT_TASKr5   r   r%   r%   r%   r)   <module>   s   