a
    dg?                     @   s   d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ G d
d deZdS )    N)AnyDictListOptionalUnioncast)Chain)LLMChain)BaseLanguageModel)FewShotPromptTemplate)is_basemodel_instance)	BaseModel
ConfigDictmodel_validator)Selfc                   @   s   e Zd ZU dZeed< dZee ed< g Z	e
ed< dZee ed< dZeed< ed	d
ZeddedddZeeedddZeeeeef ef ddddZeeeeee dddZdeeeeeee dddZdS )SyntheticDataGeneratora  Generate synthetic data using the given LLM and few-shot template.

    Utilizes the provided LLM to produce synthetic data based on the
    few-shot prompt template.

    Attributes:
        template (FewShotPromptTemplate): Template for few-shot prompting.
        llm (Optional[BaseLanguageModel]): Large Language Model to use for generation.
        llm_chain (Optional[Chain]): LLM chain with the LLM and few-shot template.
        example_input_key (str): Key to use for storing example inputs.

    Usage Example:
        >>> template = FewShotPromptTemplate(...)
        >>> llm = BaseLanguageModel(...)
        >>> generator = SyntheticDataGenerator(template=template, llm=llm)
        >>> results = generator.generate(subject="climate change", runs=5)
    templateNllmresults	llm_chainexampleexample_input_keyT)Zvalidate_assignmentafter)mode)returnc                 C   s@   | j }| j}| j}|s<|d u s&|d u r.tdt||d| _ | S )NzJBoth llm and few_shot_template must be provided if llm_chain is not given.)r   prompt)r   r   r   
ValueErrorr	   )selfr   r   Zfew_shot_template r   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_experimental/tabular_synthetic_data/base.pyset_llm_chain*   s    z$SyntheticDataGenerator.set_llm_chain)
input_dictr   c                 C   s   d dd |  D }|S )Nz, c                 S   s   g | ]\}}| d | qS )z: r   ).0keyvaluer   r   r   
<listcomp>=       zASyntheticDataGenerator._format_dict_to_string.<locals>.<listcomp>)joinitems)r!   Zformatted_strr   r   r   _format_dict_to_string:   s    z-SyntheticDataGenerator._format_dict_to_string)r   r   c                 C   sp   | j rl| j jrlt|r,| tt| }nt|trB| |}nt|}| j j	d | j j
| j|i dS )zYPrevents duplicates by adding previously generated examples to the few shot
        list.r   N)r   Zexamplesr   r)   r   r   dict
isinstancestrpopappendr   )r   r   Zformatted_exampler   r   r   _update_examplesA   s    
z'SyntheticDataGenerator._update_examples)subjectrunsargskwargsr   c                 O   sR   | j du rtdt|D ]0}| j j|d|i|}| j| | | q| jS )a  Generate synthetic data using the given subject string.

        Args:
            subject (str): The subject the synthetic data will be about.
            runs (int): Number of times to generate the data.
            extra (str): Extra instructions for steerability in data generation.

        Returns:
            List[str]: List of generated synthetic data.

        Usage Example:
            >>> results = generator.generate(subject="climate change", runs=5,
            extra="Focus on environmental impacts.")
        NzOllm_chain is none, either set either llm_chain or llm at generator constructionr0   )r   r   rangerunr   r.   r/   )r   r0   r1   r2   r3   _resultr   r   r   generateP   s    
zSyntheticDataGenerator.generate )r0   r1   extrar2   r3   r   c                    sH   dt t ttddfddtj fddt|D  I dH  jS )	a  Generate synthetic data using the given subject asynchronously.

        Note: Since the LLM calls run concurrently,
        you may have fewer duplicates by adding specific instructions to
        the "extra" keyword argument.

        Args:
            subject (str): The subject the synthetic data will be about.
            runs (int): Number of times to generate the data asynchronously.
            extra (str): Extra instructions for steerability in data generation.

        Returns:
            List[str]: List of generated synthetic data for the given subject.

        Usage Example:
            >>> results = await generator.agenerate(subject="climate change", runs=5,
            extra="Focus on env impacts.")
        r9   N)r0   r:   r2   r3   r   c                    s8    j d ur4 j j|| |d|I d H } j| d S )Nr0   r:   )r   Zarunr   r.   )r0   r:   r2   r3   r7   )r   r   r   	run_chain   s    
z3SyntheticDataGenerator.agenerate.<locals>.run_chainc                 3   s   | ]} d V  qdS )r;   Nr   )r"   r6   )r:   r<   r0   r   r   	<genexpr>   r&   z3SyntheticDataGenerator.agenerate.<locals>.<genexpr>)r9   )r,   r   asynciogatherr4   r   )r   r0   r1   r:   r2   r3   r   )r:   r<   r   r0   r   	ageneratej   s     	
z SyntheticDataGenerator.agenerate)r9   )__name__
__module____qualname____doc__r   __annotations__r   r   r
   r   listr   r   r   r,   r   Zmodel_configr   r   r    staticmethodr   r)   r   r   r   r/   intr   r8   r@   r   r   r   r   r      s(   
" 
r   )r>   typingr   r   r   r   r   r   Zlangchain.chains.baser   Zlangchain.chains.llmr	   Zlangchain_core.language_modelsr
   Zlangchain_core.prompts.few_shotr   Zlangchain_core.utils.pydanticr   Zpydanticr   r   r   Ztyping_extensionsr   r   r   r   r   r   <module>   s    