a
    bgg$                     @   s   d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ G dd	 d	eeZG d
d deZG dd de
ZdS )    )Enum)AnyIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)	BaseModel
ConfigDict)enforce_stop_tokensc                   @   s   e Zd ZdZdZdZdS )Devicez,The device to use for inference, cuda or cpucudacpuN)__name__
__module____qualname____doc__r   r    r   r   t/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/llms/titan_takeoff.pyr      s   r   c                   @   sf   e Zd ZU dZeddZeed< ej	Z
eed< dZeed< dZee ed	< d
Zeed< dZeed< dS )ReaderConfigzAConfiguration for the reader to be deployed in Titan Takeoff API.r   )Zprotected_namespacesZ
model_namedeviceprimaryconsumer_groupNtensor_paralleli   max_seq_length   max_batch_size)r   r   r   r   r   Zmodel_configstr__annotations__r   r   r   r   r   r   intr   r   r   r   r   r   r      s   
r   c                       s   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< ddddg feeeeee d fddZeedddZdeeee  ee eedddZdeeee  ee eee dddZ  ZS )TitanTakeoffa  Titan Takeoff API LLMs.

    Titan Takeoff is a wrapper to interface with Takeoff Inference API for
    generative text to text language models.

    You can use this wrapper to send requests to a generative language model
    and to deploy readers with Takeoff.

    Examples:
        This is an example how to deploy a generative language model and send
        requests.

        .. code-block:: python
            # Import the TitanTakeoff class from community package
            import time
            from langchain_community.llms import TitanTakeoff

            # Specify the embedding reader you'd like to deploy
            reader_1 = {
                "model_name": "TheBloke/Llama-2-7b-Chat-AWQ",
                "device": "cuda",
                "tensor_parallel": 1,
                "consumer_group": "llama"
            }

            # For every reader you pass into models arg Takeoff will spin
            # up a reader according to the specs you provide. If you don't
            # specify the arg no models are spun up and it assumes you have
            # already done this separately.
            llm = TitanTakeoff(models=[reader_1])

            # Wait for the reader to be deployed, time needed depends on the
            # model size and your internet speed
            time.sleep(60)

            # Returns the query, ie a List[float], sent to `llama` consumer group
            # where we just spun up the Llama 7B model
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama"
            ))

            # You can also send generation parameters to the model, any of the
            # following can be passed in as kwargs:
            # https://docs.titanml.co/docs/next/apis/Takeoff%20inference_REST_API/generate#request
            # for instance:
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama", max_new_tokens=100
            ))
    zhttp://localhostbase_urli  porti  	mgmt_portF	streamingNclient)r"   r#   r$   r%   modelsc                    sp   t  j||||d zddlm} W n ty>   tdY n0 || j| j| jd| _|D ]}| j	| qZdS )a  Initialize the Titan Takeoff language wrapper.

        Args:
            base_url (str, optional): The base URL where the Takeoff
                Inference Server is listening. Defaults to `http://localhost`.
            port (int, optional): What port is Takeoff Inference API
                listening on. Defaults to 3000.
            mgmt_port (int, optional): What port is Takeoff Management API
                listening on. Defaults to 3001.
            streaming (bool, optional): Whether you want to by default use the
                generate_stream endpoint over generate to stream responses.
                Defaults to False. In reality, this is not significantly different
                as the streamed response is buffered and returned similar to the
                non-streamed response, but the run manager is applied per token
                generated.
            models (List[ReaderConfig], optional): Any readers you'd like to
                spin up on. Defaults to [].

        Raises:
            ImportError: If you haven't installed takeoff-client, you will
            get an ImportError. To remedy run `pip install 'takeoff-client==0.4.0'`
        )r"   r#   r$   r%   r   )TakeoffClientzjtakeoff-client is required for TitanTakeoff. Please install it with `pip install 'takeoff-client>=0.4.0'`.)r#   r$   N)
super__init__Ztakeoff_clientr(   ImportErrorr"   r#   r$   r&   Zcreate_reader)selfr"   r#   r$   r%   r'   r(   model	__class__r   r   r*   o   s    
zTitanTakeoff.__init__)returnc                 C   s   dS )zReturn type of llm.Ztitan_takeoffr   )r,   r   r   r   	_llm_type   s    zTitanTakeoff._llm_type)promptstoprun_managerkwargsr0   c           	      K   s`   | j r.d}| j|||dD ]}||j7 }q|S | jj|fi |}|d }|dur\t||}|S )a  Call out to Titan Takeoff (Pro) generate endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"

                # Use of model(prompt), ie `__call__` was deprecated in LangChain 0.1.7,
                # use model.invoke(prompt) instead.
                response = model.invoke(prompt)

         )r2   r3   r4   textN)r%   _streamr7   r&   generater   )	r,   r2   r3   r4   r5   Ztext_outputchunkresponser7   r   r   r   _call   s    

zTitanTakeoff._callc                 k   s   | j j|fi |}d}|D ]|}||j7 }d|v r|dr@d}t|dddkrn|dd\}}	|d}|rt|d}
d}|r|j|
j	d |
V  q|rt|
ddd}
|r|j|
j	d |
V  d	S )
a  Call out to Titan Takeoff (Pro) stream endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Yields:
            A dictionary like object containing a string token.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"
                response = model.stream(prompt)

                # OR

                model = TitanTakeoff(streaming=True)

                response = model.invoke(prompt)

        r6   zdata:      
)r7   )tokenz</s>N)r&   Zgenerate_streamdata
startswithlensplitrstripr	   Zon_llm_new_tokenr7   replace)r,   r2   r3   r4   r5   r;   bufferr7   content_r:   r   r   r   r8      s*     



zTitanTakeoff._stream)NN)NN)r   r   r   r   r"   r   r   r#   r    r$   r%   boolr&   r   r   r   r*   propertyr1   r   r   r<   r   r	   r8   __classcell__r   r   r.   r   r!   -   sL   
2.  
1  
r!   N)enumr   typingr   r   r   r   Zlangchain_core.callbacksr   Z#langchain_core.language_models.llmsr   Zlangchain_core.outputsr	   Zpydanticr
   r   Zlangchain_community.llms.utilsr   r   r   r   r!   r   r   r   r   <module>   s   