a
    bg                     @   s\  d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ dZdd	gZd
gZdZdZdZdZdZdZdZeddddeedddZeddddeedddZeddddeeeddd Zeddd!deed"d#d$Zeddd%deeed&d'd(Zeddd)deedd*d+Z eddd,dG d-d. d.eZ!d/S )0    )md5)AnyDictListOptional)
deprecated)get_from_dict_or_env)GraphDocument)
GraphStoreZ
__Entity__Z_Bloom_Perspective_Z_Bloom_Scene_Z_Bloom_HAS_SCENE_i'     
   a4  
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "node" 
  AND NOT label IN $EXCLUDED_LABELS
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {labels: nodeLabels, properties: properties} AS output

a<  
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship"
      AND NOT label in $EXCLUDED_LABELS
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {type: nodeLabels, properties: properties} AS output
a8  
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE type = "RELATIONSHIP" AND elementType = "node"
UNWIND other AS other_node
WITH * WHERE NOT label IN $EXCLUDED_LABELS
    AND NOT other_node IN $EXCLUDED_LABELS
RETURN {start: label, type: property, end: toString(other_node)} AS output
zuMERGE (d:Document {id:$document.metadata.id}) SET d.text = $document.page_content SET d += $document.metadata WITH d z0.3.8z1.0z6langchain_neo4j.graphs.neo4j_graph.clean_string_values)ZsinceZremovalZalternative_import)textreturnc                 C   s   |  dd ddS )zClean string values for schema.

    Cleans the input text by replacing newline and carriage return characters.

    Args:
        text (str): The input text to clean.

    Returns:
        str: The cleaned text.
    
 replacer    r   t/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/graphs/neo4j_graph.pyclean_string_values7   s    r   z1langchain_neo4j.graphs.neo4j_graph.value_sanitize)dr   c                 C   s   t | tri }|  D ]d\}}t |trBt|}|durz|||< qt |trrt|tk rzt|}|durz|||< q|||< q|S t | trt| tk rdd | D S dS n| S dS )a  Sanitize the input dictionary or list.

    Sanitizes the input by removing embedding-like values,
    lists with more than 128 elements, that are mostly irrelevant for
    generating answers in a LLM context. These properties, if left in
    results, can occupy significant context space and detract from
    the LLM's performance by introducing unnecessary noise and cost.

    Args:
        d (Any): The input dictionary or list to sanitize.

    Returns:
        Any: The sanitized dictionary or list.
    Nc                 S   s    g | ]}t |d urt |qS )Nvalue_sanitize).0itemr   r   r   
<listcomp>t   s   z"value_sanitize.<locals>.<listcomp>)
isinstancedictitemsr   listlen
LIST_LIMIT)r   Znew_dictkeyvalueZsanitized_valuer   r   r   r   J   s.    






r   z9langchain_neo4j.graphs.neo4j_graph._get_node_import_query)baseEntityLabelinclude_sourcer   c                 C   sN   | r*|rt nd dt d|r dnd dS |r2t nd d|r@dnd dS d S )	N #UNWIND $data AS row MERGE (source:`z-` {id: row.id}) SET source += row.properties z MERGE (d)-[:MENTIONS]->(source) zmWITH source, row CALL apoc.create.addLabels( source, [row.type] ) YIELD node RETURN distinct 'done' AS resultzbUNWIND $data AS row CALL apoc.merge.node([row.type], {id: row.id}, row.properties, {}) YIELD node zMERGE (d)-[:MENTIONS]->(node) z RETURN distinct 'done' AS result)include_docs_queryBASE_ENTITY_LABEL)r&   r'   r   r   r   _get_node_import_query}   s    

r,   z8langchain_neo4j.graphs.neo4j_graph._get_rel_import_query)r&   r   c                 C   s   | rdt  dt  dS dS d S )Nr)   z#` {id: row.source}) MERGE (target:`z` {id: row.target}) WITH source, target, row CALL apoc.merge.relationship(source, row.type, {}, row.properties, target) YIELD rel RETURN distinct 'done'a-  UNWIND $data AS row CALL apoc.merge.node([row.source_label], {id: row.source},{}, {}) YIELD node as source CALL apoc.merge.node([row.target_label], {id: row.target},{}, {}) YIELD node as target CALL apoc.merge.relationship(source, row.type, {}, row.properties, target) YIELD rel RETURN distinct 'done'r+   )r&   r   r   r   _get_rel_import_query   s    r.   z1langchain_neo4j.graphs.neo4j_graph._format_schema)schemais_enhancedr   c              
   C   s  g }g }|r| d   D ]\\}}|d| d |D ]:}d}|d dkr|dr|dd	tkr|d rd
t|d d  dnd}n$|d rddd |d D  nd}n|d dv r|dd urd|d  d|d  }n$|drd
|d d  dnd}n@|d dkrR|dr:|d tkr:q:d|d  d|d  }|d|d  d|d  d|  q:q| d   D ]^\}}|d| d |D ]:}d}|d dkr |dd	tkr|d rd
t|d d  dnd}n&|d rddd |d D  nd}n|d dv rx|drTd|d  d |d  }n"|d rrd
|d d  dnd}nD|d dkr|dr|d tkrqd|d  d|d  }|d|d  d!|d  d"|  qqn| d   D ]4\}	}
d#d$d |
D }||	 d%| d& q| d   D ]4\}}
d#d'd |
D }|| d%| d& q8d(d | d) D }d*d+d*|d,d*|d-d*|gS ).N
node_propsz- **z**r(   typeSTRINGvaluesZdistinct_count   z
Example: "r   "zAvailable options: c                 S   s   g | ]}t |qS r   r   r   elr   r   r   r          z"_format_schema.<locals>.<listcomp>ZINTEGERFLOATZDATEZ	DATE_TIMEZLOCAL_DATE_TIMEminzMin: z, Max: maxLISTZmin_sizez
Min Size: z, Max Size: max_sizez  - `property`: r   	rel_propsc                 S   s   g | ]}t |qS r   r7   r8   r   r   r   r      r:   z, Max:  : z` , c                 S   s"   g | ]}|d   d|d  qS rA   rD   r2   r   r   propr   r   r   r     r:   z {}c                 S   s"   g | ]}|d   d|d  qS rF   r   rG   r   r   r   r   &  r:   c              	   S   s0   g | ](}d |d  d|d  d|d  dqS )z(:startz)-[:r2   z]->(:end)r   r8   r   r   r   r   +  s   relationshipsr   zNode properties:zRelationship properties:zThe relationships:)r    appendgetDISTINCT_VALUE_LIMITr   r#   join)r/   r0   Zformatted_node_propsZformatted_rel_propsZ	node_type
propertiesrH   ZexampleZrel_typelabelpropsZ	props_strr2   Zformatted_relsr   r   r   _format_schema   s    
	
 rU   z4langchain_neo4j.graphs.neo4j_graph._remove_backticksc                 C   s   |  ddS )N`r(   r   r   r   r   r   _remove_backticks<  s    rW   zlangchain_neo4j.Neo4jGraphc                   @   s   e Zd ZdZddddee ee ee ee ee eeee edd
ddZ	e
ed	d
dZe
eeef d	ddZi feeeeeef  dddZdd	ddZdee eeddddZdeeeeef  eeedddZdS )
Neo4jGrapha  Neo4j database wrapper for various graph operations.

    Parameters:
    url (Optional[str]): The URL of the Neo4j database server.
    username (Optional[str]): The username for database authentication.
    password (Optional[str]): The password for database authentication.
    database (str): The name of the database to connect to. Default is 'neo4j'.
    timeout (Optional[float]): The timeout for transactions in seconds.
            Useful for terminating long-running queries.
            By default, there is no timeout set.
    sanitize (bool): A flag to indicate whether to remove lists with
            more than 128 elements from results. Useful for removing
            embedding-like properties from database responses. Default is False.
    refresh_schema (bool): A flag whether to refresh schema information
            at initialization. Default is True.
    enhanced_schema (bool): A flag whether to scan the database for
            example values and use them in the graph schema. Default is False.
    driver_config (Dict): Configuration passed to Neo4j Driver.

    *Security note*: Make sure that the database connection uses credentials
        that are narrowly-scoped to only include necessary permissions.
        Failure to do so may result in data corruption or loss, since the calling
        code may attempt commands that would result in deletion, mutation
        of data if appropriately prompted or reading sensitive data if such
        data is present in the database.
        The best way to guard against such negative outcomes is to (as appropriate)
        limit the permissions granted to the credentials used with this tool.

        See https://python.langchain.com/docs/security for more information.
    NFT)driver_configenhanced_schema)
urlusernamepassworddatabasetimeoutsanitizerefresh_schemarY   rZ   r   c             
   C   sp  zddl }
W n ty&   tdY n0 td|idd}|dkrN|dkrNd}n(td|idd}td	|id	d
}||f}td|iddd}|
jj|fd|i|pi | _|| _|| _|| _|	| _	d| _
i | _z| j  W n< |
jjy   tdY n  |
jjy   tdY n0 |rlz|   W nB |
jjyj } z$|jdkrRtd|W Y d}~n
d}~0 0 dS )z*Create a new Neo4j graph wrapper instance.r   NzRCould not import neo4j python package. Please install it with `pip install neo4j`.r[   Z	NEO4J_URIr(   r\   ZNEO4J_USERNAMEr]   ZNEO4J_PASSWORDr^   ZNEO4J_DATABASEneo4jauthzJCould not connect to Neo4j database. Please ensure that the url is correctz]Could not connect to Neo4j database. Please ensure that the username and password are correctz+Neo.ClientError.Procedure.ProcedureNotFoundzCould not use APOC procedures. Please ensure the APOC plugin is installed in Neo4j and that 'apoc.meta.data()' is allowed in Neo4j configuration )rb   ImportErrorr   ZGraphDatabaseZdriver_driver	_databaser_   r`   _enhanced_schemar/   structured_schemaZverify_connectivity
exceptionsZServiceUnavailable
ValueErrorZ	AuthErrorra   ClientErrorcode)selfr[   r\   r]   r^   r_   r`   ra   rY   rZ   rb   rc   er   r   r   __init__j  sn    

zNeo4jGraph.__init__)r   c                 C   s   | j S )zReturns the schema of the Graph)r/   rm   r   r   r   
get_schema  s    zNeo4jGraph.get_schemac                 C   s   | j S )z*Returns the structured schema of the Graph)rh   rp   r   r   r   get_structured_schema  s    z Neo4jGraph.get_structured_schema)queryparamsr   c           
   
   C   s<  ddl m} ddlm} zL| jj||| jd| j|d\}}}dd |D }| jr`dd |D }|W S  |y } zJ|j	d	ks|j	d
krd|j
v s|j	dkrd|j
v sd|j
v s W Y d}~n
d}~0 0 | jj| jdN}	|	||| jd|}dd |D }| jrdd |D }|W  d   S 1 s.0    Y  dS )a  Query Neo4j database.

        Args:
            query (str): The Cypher query to execute.
            params (dict): The parameters to pass to the query.

        Returns:
            List[Dict[str, Any]]: The list of dictionaries containing the query results.
        r   )Query)
Neo4jError)r   r_   )Z	database_Zparameters_c                 S   s   g | ]}|  qS r   datar   rr   r   r   r     r:   z$Neo4jGraph.query.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   r   r8   r   r   r   r     r:   z+Neo.DatabaseError.Statement.ExecutionFailedz4Neo.DatabaseError.Transaction.TransactionStartFailedzin an implicit transactionz'Neo.ClientError.Statement.SemanticErrorz&in an open transaction is not possiblez+tried to execute in an explicit transactionN)r^   c                 S   s   g | ]}|  qS r   rw   ry   r   r   r   r     r:   c                 S   s   g | ]}t |qS r   r   r8   r   r   r   r     r:   )rb   ru   neo4j.exceptionsrv   re   Zexecute_queryr_   rf   r`   rl   messagesessionrun)
rm   rs   rt   ru   rv   rx   _Z	json_datarn   r}   r   r   r   rs     sD    
zNeo4jGraph.queryc              	   C   sj  ddl m}m} dd | jtdttg idD }dd | jtdtidD }dd | jt	dttg idD }z| d	}| d
}W n |y   g }g }Y n0 dd |D dd |D |||dd| _
| jrR| d}|d d D ]}	|	d tv rq| j
d |	d }
|
sq| |	d |
|	d tk }zB| |d d }|
D ]&}|d |v rL|||d   qLW q |y   Y qY q0 q|d d D ]}|d tv rq| j
d |d }|sҐq| j|d ||d tk dd}zB| |d d }|D ]&}|d |v r|||d   qW n |yL   Y qY n0 qt| j
| j}|| _dS )z?
        Refreshes the Neo4j graph schema information.
        r   )rk   CypherTypeErrorc                 S   s   g | ]}|d  qS outputr   r8   r   r   r   r     s   z-Neo4jGraph.refresh_schema.<locals>.<listcomp>EXCLUDED_LABELS)rt   c                 S   s   g | ]}|d  qS r   r   r8   r   r   r   r     s   c                 S   s   g | ]}|d  qS r   r   r8   r   r   r   r   
  s   zSHOW CONSTRAINTSzCALL apoc.schema.nodes() YIELD label, properties, type, size, valuesSelectivity WHERE type = 'RANGE' RETURN *, size * valuesSelectivity as distinctValuesc                 S   s   i | ]}|d  |d qS )labelsrR   r   r8   r   r   r   
<dictcomp>!  r:   z-Neo4jGraph.refresh_schema.<locals>.<dictcomp>c                 S   s   i | ]}|d  |d qS )r2   rR   r   r8   r   r   r   r   "  r:   )
constraintindex)r1   rC   rM   metadatazCALL apoc.meta.graphSample() YIELD nodes, relationships RETURN nodes, [rel in relationships | {name:apoc.any.property(rel, 'type'), count: apoc.any.property(rel, 'count')}] AS relationshipsnodesnamer1   countr   rA   rM   rC   T)is_relationshipN)r{   rk   r   rs   node_properties_queryr   r+   rel_properties_queryEXCLUDED_RELS	rel_queryrh   rg   rO   _enhanced_schema_cypherEXHAUSTIVE_SEARCH_LIMITupdaterU   r/   )rm   rk   r   Znode_propertiesZrel_propertiesrM   r   r   Zschema_countsnoder1   Zenhanced_cypherZenhanced_inforH   relrC   r/   r   r   r   ra     s    	


zNeo4jGraph.refresh_schema)graph_documentsr'   r&   r   c           	      C   s   |rFt dd | jdi dg D }|sF| dt d |   t||}t|}|D ]}|jj	dst
|jjd |jj	d< |jD ]}t|j|_q| |d	d |jD |jjd
 | |ddd |jD i q\dS )aR  
        This method constructs nodes and relationships in the graph based on the
        provided GraphDocument objects.

        Parameters:
        - graph_documents (List[GraphDocument]): A list of GraphDocument objects
        that contain the nodes and relationships to be added to the graph. Each
        GraphDocument should encapsulate the structure of part of the graph,
        including nodes, relationships, and the source document information.
        - include_source (bool, optional): If True, stores the source document
        and links it to nodes in the graph using the MENTIONS relationship.
        This is useful for tracing back the origin of data. Merges source
        documents based on the `id` property from the source document metadata
        if available; otherwise it calculates the MD5 hash of `page_content`
        for merging process. Defaults to False.
        - baseEntityLabel (bool, optional): If True, each newly created node
        gets a secondary __Entity__ label, which is indexed and improves import
        speed and performance. Defaults to False.
        c                 S   s(   g | ] }|d  t gko"|d dgkqS )ZlabelsOrTypesrR   idr-   r8   r   r   r   r   v  s   z2Neo4jGraph.add_graph_documents.<locals>.<listcomp>r   r   z'CREATE CONSTRAINT IF NOT EXISTS FOR (b:z) REQUIRE b.id IS UNIQUE;r   zutf-8c                 S   s   g | ]
}|j qS r   )__dict__r8   r   r   r   r     r:   )rx   documentrx   c                 S   sJ   g | ]B}|j jt|j j|jjt|jjt|jd d |jdqS )r   r   )sourceZsource_labeltargetZtarget_labelr2   rR   )r   r   rW   r2   r   r   upperrR   r8   r   r   r   r     s   

N)anyrh   rO   rs   r+   ra   r,   r.   r   r   r   Zpage_contentencode	hexdigestr   rW   r2   r   rM   )	rm   r   r'   r&   Zconstraint_existsZnode_import_queryZrel_import_queryr   r   r   r   r   add_graph_documents[  sF    



zNeo4jGraph.add_graph_documents)label_or_typerR   
exhaustiver   r   c                    s  |rd  d}nd  d}g }g }i }|rh|D ],}	|	d |	d }
|
dkr| d d	 d
 | d dt d d n|
dv r| d d d | d d d | d d d | d d d d nT|
dkrB| d d d d d	 | d d  d n|
d!v rNq4d"|  d# |< q4n|d$7 }|D ]}	|	d |	d }
 fd%d&| jd' d( D }|
dkrH|r|d) d*d)kr|d) d+tkr| d,  d- d.d) d/ }| d0| d1t|  n*| d d	 d
 | d2 d
 n|
dv r|s| d3 d d
 | d2 d
 nf| d d d | d d d | d d d | d d d d nV|
dkr4| d d d d d	 | d d  d n|
d!v rBqtd"|  d# |< qtd4d5| }d6d7d8d9 | D  d: }d;|||g}|S )<NzMATCH ()-[n:`z`]->()z
MATCH (n:`z`)rA   r2   r3   z'collect(distinct substring(toString(n.`z`), 0, 50)) AS `z_values`zvalues:`z_values`[..z], distinct_count: size(`z	_values`)r;   zmin(n.`z`) AS `z_min`zmax(n.`z_max`zcount(distinct n.`z
_distinct`zmin: toString(`z_min`), max: toString(`z_max`), distinct_count: `r?   zmin(size(n.`z`)) AS `z_size_min`, max(size(n.`z
_size_max`zmin_size: `z_size_min`, max_size: `)ZBOOLEANZPOINTZDURATION{rI   z WITH n LIMIT 5c                    s6   g | ].}|d   kr|d gkr|d dkr|qS )rS   rR   r2   RANGEr   r8   r   Z	prop_namer   r   r     s
   z6Neo4jGraph._enhanced_schema_cypher.<locals>.<listcomp>r   r   r   sizeZdistinctValuesz&CALL apoc.schema.properties.distinct('z', 'z') YIELD valuer%   zvalues: z, distinct_count: z	values: `zcollect(distinct toString(n.`zWITH z,
     zRETURN {rE   c                 s   s"   | ]\}}d | d| V  qdS )rV   rB   Nr   )r   kvr   r   r   	<genexpr>J  r:   z5Neo4jGraph._enhanced_schema_cypher.<locals>.<genexpr>z} AS outputr   )	rN   rP   poprh   rO   rs   r"   rQ   r    )rm   r   rR   r   r   Zmatch_clauseZwith_clausesZreturn_clausesZoutput_dictrH   Z	prop_typeZ
prop_indexZdistinct_valuesZwith_clauseZreturn_clauseZcypher_queryr   r   r   r     s   







z"Neo4jGraph._enhanced_schema_cypher)NNNNNFT)FF)F)__name__
__module____qualname____doc__r   strfloatboolr   ro   rA   rq   r   rr   r   r   rs   ra   r	   r   r   r   r   r   r   rX   E  sd   !       
M6g  X rX   N)"hashlibr   typingr   r   r   r   Zlangchain_core._api.deprecationr   Zlangchain_core.utilsr   Z)langchain_community.graphs.graph_documentr	   Z&langchain_community.graphs.graph_storer
   r+   r   r   r   r#   rP   r   r   r   r*   r   r   r   r   r,   r.   rU   rW   rX   r   r   r   r   <module>   sv   
	. 