a
    bg(                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d deZdS )    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_names)
ConfigDictc                   @  s  e Zd ZU dZdZded< dZded< dZd	ed
< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded < d!Zd	ed"< d#Zded$< d#Zded%< dZded&< d'Zded(< d)Zded*< ed+d,Zedd-d.d/Zed0d-d1d2Zed3d-d4d5ZdEd6d7d3d8d9d:ZdFdd6d;d7dd<d=d>Z dGdd6d;d7d?d<d@dAZ!dddBdCdDZ"dS )H	LlamafileaO  Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode:

        `./path/to/model.llamafile --server --nobrowser`

    Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile()
            llm.invoke("Tell me a joke.")
    zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etaZforbid)extra)returnc                 C  s   dS )NZ	llamafile )selfr,   r,   p/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/llms/llamafile.py	_llm_typex   s    zLlamafile._llm_typez	List[str]c                   s$   g d  fddt | jD }|S )N)r   cacheZcallback_manager	callbacksmetadatanamer   r   tagsverboseZcustom_get_token_idsc                   s   g | ]}| vr|qS r,   r,   ).0kZignore_keysr,   r.   
<listcomp>   s   z/Llamafile._param_fieldnames.<locals>.<listcomp>)r   	__class__)r-   attrsr,   r8   r.   _param_fieldnames|   s
    
zLlamafile._param_fieldnameszDict[str, Any]c                 C  s"   i }| j D ]}t| |||< q
|S )N)r<   getattr)r-   params	fieldnamer,   r,   r.   _default_params   s    
zLlamafile._default_paramszOptional[List[str]]r   )stopkwargsr+   c                 K  sV   | j }| D ]\}}||v r|||< q|d urDt|dkrD||d< | jrRd|d< |S )Nr   rA   Tstream)r@   itemslenr   )r-   rA   rB   r>   r7   vr,   r,   r.   _get_parameters   s    
zLlamafile._get_parametersz"Optional[CallbackManagerForLLMRun])promptrA   run_managerrB   r+   c                 K  s   | j rbt B}| j|f||d|D ]}||j q&| }W d   n1 sT0    Y  |S | jf d|i|}d|i|}	z&tj| j	 dddi|	d| j
d	}
W n, tjjy   tjd
| j	 dY n0 |
  d|
_|
 d }|S dS )a  Request prompt completion from the llamafile server and return the
        output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            The string generated by the model.

        )rA   rI   NrA   rH   /completionContent-Typeapplication/jsonFurlheadersjsonrC   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8content)r   r   _streamwritetextgetvaluerG   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingrP   )r-   rH   rA   rI   rB   ZbuffchunkrW   r>   payloadresponser,   r,   r.   _call   sB    
&

	
zLlamafile._callzIterator[GenerationChunk]c                 k  s   | j f d|i|}d|vr$d|d< d|i|}z&tj| j dddi|d| jd}W n, tjjy   tjd	| j d
Y n0 d|_|jddD ]0}| 	|}	t
|	d}
|r|j|
jd |
V  qdS )a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rA   rC   TrH   rJ   rK   rL   rM   rR   rS   utf8)decode_unicode)rW   )tokenN)rG   rY   rZ   r   r   r[   r\   r^   
iter_lines_get_chunk_contentr   Zon_llm_new_tokenrW   )r-   rH   rA   rI   rB   r>   r`   ra   Z	raw_chunkrT   r_   r,   r,   r.   rU      s4    &

	


zLlamafile._stream)r_   r+   c                 C  s.   | dr&|d}t|}|d S |S dS )zWhen streaming is turned on, llamafile server returns lines like:

        'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

        Here, we convert this to a dict and return the value of the 'content'
        field
        zdata:zdata: rT   N)
startswithlstriprP   loads)r-   r_   cleaneddatar,   r,   r.   rg   1  s
    	


zLlamafile._get_chunk_content)N)NN)NN)#__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r   r   r   r   r   r    r!   r#   r$   r%   r&   r'   r(   r)   r   Zmodel_configpropertyr/   r<   r@   rG   rb   rU   rg   r,   r,   r,   r.   r      sN   
   ?  Fr   )
__future__r   rP   ior   typingr   r   r   r   r   rY   Z langchain_core.callbacks.managerr	   Z#langchain_core.language_models.llmsr
   Zlangchain_core.outputsr   Zlangchain_core.utilsr   Zpydanticr   r   r,   r,   r,   r.   <module>   s   