a
    dgE4                     @   s   d dl mZ d dlmZmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZmZmZ d dlmZ G dd deeZeed	d
dZG dd dZg dZg dZG dd dZG dd dZdS )    )Enum)AnyDictListOptionalSequenceTupleUnionN)get_from_env)GraphDocumentNodeRelationship)Documentc                   @   s   e Zd ZdZdZdZdS )
TypeOptionfactsentities	sentimentN)__name__
__module____qualname__FACTSZENTITIESZ	SENTIMENT r   r   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_experimental/graph_transformers/diffbot.pyr   
   s   r   )sreturnc                 C   sB   |   }|s| S |d  }dd |dd D }d|g| S )z.Formats a string to be used as a property key.r   c                 S   s   g | ]}|  qS r   )
capitalize).0wordr   r   r   
<listcomp>       z'format_property_key.<locals>.<listcomp>   N )splitlowerjoin)r   wordsZ
first_wordZcapitalized_wordsr   r   r   format_property_key   s    r&   c                   @   sZ   e Zd ZdZddddZeeeef ef e	ee
f ddddZee dd	d
ZdS )	NodesLista
  List of nodes with associated properties.

    Attributes:
        nodes (Dict[Tuple, Any]): Stores nodes as keys and their properties as values.
            Each key is a tuple where the first element is the
            node ID and the second is the node type.
    Nr   c                 C   s   t  | _d S )N)dictnodesselfr   r   r   __init__$   s    zNodesList.__init__)node
propertiesr   c                 C   s*   || j vr|| j |< n| j | | dS )a  
        Adds or updates node properties.

        If the node does not exist in the list, it's added along with its properties.
        If the node already exists, its properties are updated with the new values.

        Args:
            node (Tuple): A tuple containing the node ID and node type.
            properties (Dict): A dictionary of properties to add or update for the node.
        N)r*   update)r,   r.   r/   r   r   r   add_node_property'   s    
zNodesList.add_node_propertyc                    s    fdd j D }|S )z
        Returns the nodes as a list of Node objects.

        Each Node object will have its ID, type, and properties populated.

        Returns:
            List[Node]: A list of Node objects.
        c                    s(   g | ] }t |d  |d  j| dqS )r   r    )idtyper/   )r   r*   )r   keyr+   r   r   r   B   s   z.NodesList.return_node_list.<locals>.<listcomp>)r*   )r,   r*   r   r+   r   return_node_list9   s    	
zNodesList.return_node_list)r   r   r   __doc__r-   r   r	   strintr   r   r1   r   r   r5   r   r   r   r   r'      s   r'   )DateNumberz	Job titlezCause of deathzOrganization typezAcademic title))ZHEADQUARTERSZORGANIZATION_LOCATIONS)Z	RESIDENCEPERSON_LOCATION)ZALL_PERSON_LOCATIONSr;   )ZCHILDZ	HAS_CHILD)ZPARENTZ
HAS_PARENT)Z	CUSTOMERSZHAS_CUSTOMER)Z
SKILLED_ATZINTERESTED_INc                   @   s.   e Zd ZdZddddZeedddZdS )	SimplifiedSchemazSimplified schema mapping.

    Attributes:
        schema (Dict): A dictionary containing the mapping to simplified schema types.
    Nr(   c                 C   s(   t  | _tD ]}|d | j|d < qdS )z?Initializes the schema dictionary based on the predefined list.r    r   N)r)   schemaschema_mapping)r,   rowr   r   r   r-   f   s    zSimplifiedSchema.__init__)r3   r   c                 C   s(   z| j | W S  ty"   | Y S 0 dS )a5  
        Retrieves the simplified schema type for a given original type.

        Args:
            type (str): The original schema type to find the simplified type for.

        Returns:
            str: The simplified schema type if it exists;
                 otherwise, returns the original type.
        N)r=   KeyError)r,   r3   r   r   r   get_typel   s    zSimplifiedSchema.get_type)r   r   r   r6   r-   r7   rA   r   r   r   r   r<   _   s   r<   c                   @   s   e Zd ZdZdddddejgfddee ee	e	e	e
e e	dddd	Zeeeef d
ddZeeef eedddZee e
e dddZdS )DiffbotGraphTransformera  Transform documents into graph documents using Diffbot NLP API.

    A graph document transformation system takes a sequence of Documents and returns a
    sequence of Graph Documents.

    Example:
        .. code-block:: python
          from langchain_experimental.graph_transformers import DiffbotGraphTransformer
          from langchain_core.documents import Document

          diffbot_api_key = "DIFFBOT_API_KEY"
          diffbot_nlp = DiffbotGraphTransformer(diffbot_api_key=diffbot_api_key)

          document = Document(page_content="Mike Tunge is the CEO of Diffbot.")
          graph_documents = diffbot_nlp.convert_to_graph_documents([document])

    Ngffffff?TF)include_confidence)diffbot_api_keyfact_confidence_thresholdinclude_qualifiersinclude_evidencesimplified_schemaextract_typesrC   r   c                C   sP   |pt dd| _|| _|| _|| _|| _d| _|r:t | _|sFtd|| _	dS )a  
        Initialize the graph transformer with various options.

        Args:
            diffbot_api_key (str):
               The API key for Diffbot's NLP services.

            fact_confidence_threshold (float):
                Minimum confidence level for facts to be included.
            include_qualifiers (bool):
                Whether to include qualifiers in the relationships.
            include_evidence (bool):
                Whether to include evidence for the relationships.
            simplified_schema (bool):
                Whether to use a simplified schema for relationships.
            extract_types (List[TypeOption]):
                A list of data types to extract. Facts, entities, and
                sentiment are supported. By default, the option is
                set to facts. A fact represents a combination of
                source and target nodes with a relationship type.
            include_confidence (bool):
                Whether to include confidence scores on nodes and rels
        rD   ZDIFFBOT_API_KEYNzZ`extract_types` cannot be an empty array. Allowed values are 'facts', 'entities', or both.)
r
   rD   fact_threshold_confidencerF   rG   rC   rH   r<   
ValueErrorrI   )r,   rD   rE   rF   rG   rH   rI   rC   r   r   r   r-      s    "z DiffbotGraphTransformer.__init__)textr   c                 C   sJ   |dd}d | j}d}d| d| d| j d}tj||d	}| S )
z
        Make an API request to the Diffbot NLP endpoint.

        Args:
            text (str): The text to be processed.

        Returns:
            Dict[str, Any]: The JSON response from the API.
        en)contentlang,znl.diffbot.comzhttps://z/v1/?fields=z&token=z&language=en)data)r$   rI   rD   requestspostjson)r,   rL   payloadZFIELDSZHOSTurlresultr   r   r   nlp_request   s    z#DiffbotGraphTransformer.nlp_request)rU   documentr   c                 C   s2  d|vs|d s.d|vs |d s.t g g |dS t }d|v r|d r|d D ]}|d sZqL|d rn|d d n|d }|d d d  }|d }|||fd|i |dd	ur|||fd|di | jrL|||fd
|d
i qLt }d|v r |d r |d D ]
}|d
 | jk r,q|d d s>q|d d r\|d d d n
|d d }|d d d d  }|d d }t||d}	|||fd|i |d d r|d d d n
|d d }
|d d d d  }|d d }|t	v r&|||ft
|d d |i nt|
|d}||
|fd|i |d d dd }| jrr| j|}t }dd |d D d }| jr|d|i | jr|d
|d
 i | jr|dr|d D ]&}t
|d d }|d d ||< qt|	|||d}|| qt | ||dS )a3  
        Transform the Diffbot NLP response into a GraphDocument.

        Args:
            payload (Dict[str, Any]): The JSON response from Diffbot's NLP API.
            document (Document): The original document.

        Returns:
            GraphDocument: The transformed document as a graph.
        r   r   )r*   relationshipssourceZallTypesZallUrisr   namer   N
confidencevalueentity)r2   r3   property _c                 S   s   g | ]}|d  qS )Zpassager   )r   elr   r   r   r   B  s   z<DiffbotGraphTransformer.process_response.<locals>.<listcomp>ZevidenceZ
qualifiers)r[   targetr3   r/   )r   r'   r   r1   getrC   listrJ   r   FACT_TO_PROPERTY_TYPEr&   replaceupperrH   rA   r)   rG   r0   rF   r   appendr5   )r,   rU   rY   Z
nodes_listrecordZ	source_idZsource_labelZsource_namerZ   Zsource_nodeZ	target_idZtarget_labelZtarget_nameZtarget_nodeZrel_typeZrel_propertiesZrelationship_evidencer`   Zprop_keyZrelationshipr   r   r   process_response   s    




z(DiffbotGraphTransformer.process_response)	documentsr   c                 C   s4   g }|D ]&}|  |j}| ||}|| q|S )a  Convert a sequence of documents into graph documents.

        Args:
            documents (Sequence[Document]): The original documents.
            kwargs: Additional keyword arguments.

        Returns:
            Sequence[GraphDocument]: The transformed documents as graphs.
        )rX   Zpage_contentrl   rj   )r,   rm   resultsrY   Zraw_resultsZgraph_documentr   r   r   convert_to_graph_documents\  s    z2DiffbotGraphTransformer.convert_to_graph_documents)r   r   r   r6   r   r   r   r7   floatboolr   r-   r   r   rX   r   r   rl   r   ro   r   r   r   r   rB   }   s4   	4rB   )enumr   typingr   r   r   r   r   r   r	   rR   Zlangchain.utilsr
   Z)langchain_community.graphs.graph_documentr   r   r   Zlangchain_core.documentsr   r7   r   r&   r'   rg   r>   r<   rB   r   r   r   r   <module>   s   $/
