a
    bgD"                     @   sl   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ G dd deZdS )	    N)PathLike)Path)AnyCallableDictIteratorOptionalUnion)Document)
BaseLoaderc                	   @   s   e Zd ZdZdeeef eee ee ee	e
e
ge
f  eedddZee dd	d
Zeeee dddZeedddZe
eef ee
eef dddZeddddZdS )
JSONLoadera  
    Load a `JSON` file using a `jq` schema.

    Setup:
        .. code-block:: bash

            pip install -U jq

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import JSONLoader
            import json
            from pathlib import Path

            file_path='./sample_quiz.json'
            data = json.loads(Path(file_path).read_text())
            loader = JSONLoader(
                     file_path=file_path,
                     jq_schema='.quiz',
                     text_content=False)

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quizg
            .json', 'seq_num': 1}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}
    NFT)	file_path	jq_schemacontent_keyis_content_key_jq_parsablemetadata_functext_content
json_linesc           	      C   sj   zddl }|| _ W n ty,   tdY n0 t| | _||| _|| _|| _|| _	|| _
|| _dS )a~  Initialize the JSONLoader.

        Args:
            file_path (Union[str, PathLike]): The path to the JSON or JSON Lines file.
            jq_schema (str): The jq schema to use to extract the data or text from
                the JSON.
            content_key (str): The key to use to extract the content from
                the JSON if the jq_schema results to a list of objects (dict).
                If is_content_key_jq_parsable is True, this has to be a jq compatible
                schema. If is_content_key_jq_parsable is False, this should be a simple
                string key.
            is_content_key_jq_parsable (bool): A flag to determine if
                content_key is parsable by jq or not. If True, content_key is
                treated as a jq schema and compiled accordingly. If False or if
                content_key is None, content_key is used as a simple string.
                Default is False.
            metadata_func (Callable[Dict, Dict]): A function that takes in the JSON
                object extracted by the jq_schema and the default metadata and returns
                a dict of the updated metadata.
            text_content (bool): Boolean flag to indicate whether the content is in
                string format, default to True.
            json_lines (bool): Boolean flag to indicate whether the input is in
                JSON Lines format.
        r   Nz=jq package not found, please install it with `pip install jq`)jqImportErrorr   resolver   compile
_jq_schema_is_content_key_jq_parsable_content_key_metadata_func_text_content_json_lines)	selfr   r   r   r   r   r   r   r    r   ~/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/json_loader.py__init__T   s    "

zJSONLoader.__init__)returnc                 c   s   d}| j rp| jjddF}|D ]0}| }|r| ||D ]}|V  |d7 }q:qW d   q1 sd0    Y  n*| | jjdd|D ]}|V  |d7 }qdS )z-Load and return documents from the JSON file.r   zutf-8)encoding   N)r   r   openstrip_parse	read_text)r   indexflinedocr   r   r    	lazy_load   s    ,zJSONLoader.lazy_load)contentr)   r"   c                 c   sr   | j t|}| jdur&| | t||d D ]8\}}| j|d}| j|t	| j
|d}t||dV  q4dS )z#Convert given content to documents.Nr$   )sample)r/   sourceZseq_num)Zpage_contentmetadata)r   inputjsonloadsr   _validate_content_key	enumerate	_get_text_get_metadatastrr   r
   )r   r.   r)   datair/   textr1   r   r   r    r'      s    

zJSONLoader._parse)r/   r"   c                 C   s   | j dur:| jr.| j| j }|| }q>|| j  }n|}| jrlt|tsl|durlt	dt
| dnBt|trz|S t|ttfr|rt|S dS |durt|S dS dS )zConvert sample to string formatNz%Expected page_content is string, got z instead.                     Set `text_content=False` if the desired input for                     `page_content` is not a string )r   r   r   r   r2   firstr   
isinstancer9   
ValueErrortypedictlistr3   dumps)r   r/   Zcompiled_content_keyr.   r   r   r    r7      s    

zJSONLoader._get_text)r/   additional_fieldsr"   c                 K   s@   | j dur8|  ||}t|ts4tdt| d|S |S dS )z
        Return a metadata dictionary base on the existence of metadata_func
        :param sample: single data payload
        :param additional_fields: key-word arguments to be added as metadata values
        :return:
        NzUExpected the metadata_func to return a dict but got                                 ``)r   r?   rB   r@   rA   )r   r/   rE   resultr   r   r    r8      s    	

zJSONLoader._get_metadata)r:   r"   c                 C   s   |  }t|ts&tdt| d| jsN|| jdu rNtd| j d| jr| j	| j
| du rtd| j ddS )zCheck if a content key is validztExpected the jq schema to result in a list of objects (dict),                     so sample must be a dict but got `rF   Nz_Expected the jq schema to result in a list of objects (dict)                     with the key `z ` which should be parsable by jq)r>   r?   rB   r@   rA   r   getr   r   r   r2   r<   )r   r:   r/   r   r   r    r5      s2    
z JSONLoader._validate_content_key)NFNTF)__name__
__module____qualname____doc__r	   r9   r   r   boolr   r   r!   r   r
   r-   intr'   r   r7   r8   r5   r   r   r   r    r      s,   L     
3
r   )r3   osr   pathlibr   typingr   r   r   r   r   r	   Zlangchain_core.documentsr
   Z)langchain_community.document_loaders.baser   r   r   r   r   r    <module>   s    