a
    bg%                     @  s   d dl mZ d dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ er|d dlmZmZ d dlmZ G dd	 d	eZd
S )    )annotations)	TYPE_CHECKINGAnyDict	GeneratorIteratorListMappingOptionalUnion)CallbackManagerForLLMRun)LLM)GenerationChunk)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                      s   e Zd ZU dZdZded< ded< ded< ded	< d,ddd
d fddZeddddZeddddZ	d-dddd
ddddZ
d.ddddddddZd/dddd
d dd!d"Zd0dd#d$d%d&d'Zedd(d)d*d+Z  ZS )1
Xinferencea	  `Xinference` large-scale model inference service.

    To use, you should have the xinference library installed:

    .. code-block:: bash

       pip install "xinference[all]"

    If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

    .. code-block:: bash

        pip install xinference_client

    Check out: https://github.com/xorbitsai/inference
    To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

    Example:
        To start a local instance of Xinference, run

        .. code-block:: bash

           $ xinference

        You can also deploy Xinference in a distributed cluster. Here are the steps:

        Starting the supervisor:

        .. code-block:: bash

           $ xinference-supervisor

        Starting the worker:

        .. code-block:: bash

           $ xinference-worker

    Then, launch a model using command line interface (CLI).

    Example:

    .. code-block:: bash

       $ xinference launch -n orca -s 3 -q q4_0

    It will return a model UID. Then, you can use Xinference with LangChain.

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
        )

        llm.invoke(
            prompt="Q: where can we visit in the capital of France? A:",
            generate_config={"max_tokens": 1024, "stream": True},
        )

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference
        from langchain.prompts import PromptTemplate

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid={model_uid}, # replace model_uid with the model UID return from launching the model
            stream=True
        )
        prompt = PromptTemplate(
            input=['country'],
            template="Q: where can we visit in the capital of {country}? A:"
        )
        chain = prompt | llm
        chain.stream(input={'country': 'France'})


    To view all the supported builtin models, run:

    .. code-block:: bash

        $ xinference list --all

    NzOptional[Any]clientzOptional[str]
server_url	model_uidzDict[str, Any]model_kwargsr   r   r   r   c                   s   zddl m} W nR tyb   zddlm} W n. ty\ } ztd|W Y d }~n
d }~0 0 Y n0 |pji }t jf i |||d | jd u rtd| jd u rtd||| _	d S )Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   zPlease provide server URLzPlease provide the model UID)
xinference.clientr   ImportErrorZxinference_clientsuper__init__r   
ValueErrorr   r   )selfr   r   r   r   e	__class__ q/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/llms/xinference.pyr   }   s.    


zXinference.__init__str)returnc                 C  s   dS )zReturn type of llm.Z
xinferencer"   r   r"   r"   r#   	_llm_type   s    zXinference._llm_typezMapping[str, Any]c                 C  s"   i d| j id| jid| jiS )zGet the identifying parameters.r   r   r   r   r&   r"   r"   r#   _identifying_params   s    zXinference._identifying_paramszOptional[List[str]]z"Optional[CallbackManagerForLLMRun])promptstoprun_managerkwargsr%   c           
      K  s   | j du rtd| j | j}|di }i | j|}|rF||d< |r||dr|d}| j||||dD ]}||7 }qj|S |j||d}	|	d	 d
 d S dS )aq  Call the xinference model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Returns:
            The generated string by the model.
        NClient is not initialized!generate_configr*   stream )modelr)   r+   r.   r)   r.   choicesr   text)r   r   	get_modelr   getr   _stream_generategenerate)
r   r)   r*   r+   r,   r1   r.   Zcombined_text_outputtoken
completionr"   r"   r#   _call   s&    


zXinference._callz=Union['RESTfulGenerateModelHandle', 'RESTfulChatModelHandle']z"Optional['LlamaCppGenerateConfig']zGenerator[str, None, None])r1   r)   r+   r.   r%   c                 c  sz   |j ||d}|D ]b}t|tr|dg }|r|d }t|tr|dd}	|d}
|rn|j|	| j|
d |	V  qdS )	a^  
        Args:
            prompt: The prompt to use for generation.
            model: The model used for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Yields:
            A string token.
        r2   r3   r   r4   r0   logprobs)r9   verbose	log_probsN)r8   
isinstancedictr6   on_llm_new_tokenr=   )r   r1   r)   r+   r.   Zstreaming_responsechunkr3   choicer9   r>   r"   r"   r#   r7      s     


zXinference._stream_generatezIterator[GenerationChunk]c                 k  sf   | di }i | j|}|r&||d< | ||D ].}|r2| |}|rZ|j|j| jd |V  q2d S )Nr.   r*   )r=   )r6   r   _create_generate_stream$_stream_response_to_generation_chunkrA   r4   r=   )r   r)   r*   r+   r,   r.   Zstream_resprB   r"   r"   r#   _stream   s    
zXinference._streamzOptional[Dict[str, List[str]]]zIterator[str])r)   r.   r%   c                 c  s8   | j d u rtd| j | j}|j||dE d H  d S )Nr-   r2   )r   r   r5   r   r8   )r   r)   r.   r1   r"   r"   r#   rD     s    
z"Xinference._create_generate_streamr   )stream_responser%   c                 C  s   d}t | trv| dg }|rj|d }t |tr`|dd}t|t|dd|dddd	S td
q~t|dS ntddS )z0Convert a stream response to a generation chunk.r0   r3   r   r4   finish_reasonNr<   )rH   r<   )r4   Zgeneration_infozchoice type error!)r4   zstream_response type error!)r?   r@   r6   r   	TypeError)rG   r9   r3   rC   r"   r"   r#   rE     s"    




z/Xinference._stream_response_to_generation_chunk)NN)NN)NN)NN)N)__name__
__module____qualname____doc__r   __annotations__r   propertyr'   r(   r;   r7   rF   rD   staticmethodrE   __classcell__r"   r"   r    r#   r      s2   
\  #  0  &   r   N)
__future__r   typingr   r   r   r   r   r   r	   r
   r   Zlangchain_core.callbacksr   Z#langchain_core.language_models.llmsr   Zlangchain_core.outputsr   r   r   r   Zxinference.model.llm.corer   r   r"   r"   r"   r#   <module>   s   ,