a
    dg                     @   sB   d dl Z d dlZd dlmZmZmZ d dlmZ G dd dZdS )    N)AnyListOptional)ModerationToxicityErrorc                   @   st   e Zd ZdZdeee ee ee ddddZeedddZ	deee
e
e  d
ddZdeeedddZdS )ComprehendToxicityz$Class to handle toxicity moderation.N)clientcallback	unique_idchain_idreturnc                 C   s$   || _ |ddd| _|| _|| _d S )NZToxicityZLABELS_NOT_FOUND)Zmoderation_chain_idZmoderation_typemoderation_status)r   moderation_beaconr   r	   )selfr   r   r	   r
    r   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_experimental/comprehend_moderation/toxicity.py__init__   s    zComprehendToxicity.__init__)max_sizer   c                 C   sf   |dkrt dztd}|jd |W S  tyF   tdY n ty`   |d Y n0 dS )a  
        Validate and initialize toxicity processing configuration.

        Args:
            max_size (int): Maximum sentence size defined in the
            configuration object.

        Raises:
            Exception: If the maximum sentence size exceeds the 5KB limit.

        Note:
            This function ensures that the NLTK punkt tokenizer is downloaded
            if not already present.

        Returns:
            None
        i   z*The sentence length should not exceed 5KB.nltkztokenizers/punktzPCould not import nltk python package. Please install it with `pip install nltk`.ZpunktN)		Exception	importlibimport_moduledatafindImportErrorModuleNotFoundErrorLookupErrordownload)r   r   r   r   r   r   _toxicity_init_validate   s    
z*ComprehendToxicity._toxicity_init_validate   )prompt_valuer   r   c           
      C   s   |  |}||}t }t }d}|D ]R}t|d}	||	 |ksRt|dkrh|r`|| g }d}|| ||	7 }q(|r|| |S )aO  
        Split a paragraph into chunks of sentences, respecting the maximum size limit.

        Args:
            paragraph (str): The input paragraph to be split into chunks.
            max_size (int, optional): The maximum size limit in bytes for
            each chunk. Defaults to 1024.

        Returns:
            List[List[str]]: A list of chunks, where each chunk is a list
            of sentences.

        Note:
            This function validates the maximum sentence size based on service
            limits using the 'toxicity_init_validate' function. It uses the NLTK
            sentence tokenizer to split the paragraph into sentences.

        Example:
            paragraph = "This is a sample paragraph. It
            contains multiple sentences. ..."
            chunks = split_paragraph(paragraph, max_size=2048)
        r   zutf-8
   )r   Zsent_tokenizelistlenencodeappend)
r   r   r   r   Z	sentenceschunksZcurrent_chunkZcurrent_sizesentenceZsentence_sizer   r   r   _split_paragraph=   s"    





z#ComprehendToxicity._split_paragraph)r   configr   c                 C   s(  | j |d}|D ]}dd |D }| jj|dd}| jrV| jjrV|| jd< || jd< d}|d	}|d
}	|	s|d D ]&}
|
d D ]}|d |krd} qzqqzn<|d D ]2}
|
d D ]$}|d |	v r|d |krd} qqq| jr| jjr|rd| jd< t| j	| j| j
 |rtq|S )a$  
        Check the toxicity of a given text prompt using AWS
        Comprehend service and apply actions based on configuration.
        Args:
            prompt_value (str): The text content to be checked for toxicity.
            config (Dict[str, Any]): Configuration for toxicity checks and actions.

        Returns:
            str: The original prompt_value if allowed or no toxicity found.

        Raises:
            ValueError: If the prompt contains toxic labels and cannot be
            processed based on the configuration.
        )r   c                 S   s   g | ]}d |iqS )Textr   ).0r&   r   r   r   
<listcomp>       z/ComprehendToxicity.validate.<locals>.<listcomp>en)ZTextSegmentsZLanguageCodeZmoderation_inputZmoderation_outputF	thresholdlabelsZ
ResultListZLabelsZScoreTNameZLABELS_FOUNDr   )r'   r   Zdetect_toxic_contentr   Ztoxicity_callbackr   getasynciocreate_taskZon_after_toxicityr	   r   )r   r   r(   r%   Zsentence_listsegmentsresponseZtoxicity_foundr.   Ztoxicity_labelsitemlabelr   r   r   validatep   sJ    








zComprehendToxicity.validate)NNN)r   )N)__name__
__module____qualname____doc__r   r   strr   intr   r   r'   r8   r   r   r   r   r   
   s$      ! 
3r   )	r2   r   typingr   r   r   ZGlangchain_experimental.comprehend_moderation.base_moderation_exceptionsr   r   r   r   r   r   <module>   s   