a
    bg                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ G d	d
 d
eZG dd deZdS )    N)TextIOWrapper)Path)AnyDictIteratorListOptionalSequenceUnion)Document)
BaseLoader)detect_file_encodings)UnstructuredFileLoadervalidate_unstructured_versionc                
   @   st   e Zd ZdZdddeeef ee ee ee	 ee e
ee dddZee d	d
dZeee dddZdS )	CSVLoadera  Load a `CSV` file into a list of Documents.

    Each document represents one row of the CSV file. Every row is converted
    into a key/value pair and outputted to a new line in the document's
    page_content.

    The source for each document loaded from csv is set to the value of the
    `file_path` argument for all documents by default.
    You can override this by setting the `source_column` argument to the
    name of a column in the CSV file.
    The source of each document will then be set to the value of the column
    with the name specified in `source_column`.

    Output Example:
        .. code-block:: txt

            column1: value1
            column2: value2
            column3: value3

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import CSVLoader

            loader = CSVLoader(file_path='./hw_200.csv',
                csv_args={
                'delimiter': ',',
                'quotechar': '"',
                'fieldnames': ['Index', 'Height', 'Weight']
            })

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}
    N F)content_columns)	file_pathsource_columnmetadata_columnscsv_argsencodingautodetect_encodingr   c                C   s2   || _ || _|| _|| _|pi | _|| _|| _dS )a  

        Args:
            file_path: The path to the CSV file.
            source_column: The name of the column in the CSV file to use as the source.
              Optional. Defaults to None.
            metadata_columns: A sequence of column names to use as metadata. Optional.
            csv_args: A dictionary of arguments to pass to the csv.DictReader.
              Optional. Defaults to None.
            encoding: The encoding of the CSV file. Optional. Defaults to None.
            autodetect_encoding: Whether to try to autodetect the file encoding.
            content_columns: A sequence of column names to use for the document content.
                If not present, use all columns that are not part of the metadata.
        N)r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   }/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/csv_loader.py__init__c   s    
zCSVLoader.__init__returnc                 c   sF  zFt | jd| jd }| |E d H  W d    n1 s:0    Y  W n ty
 } z| jrt| j}|D ]t}zXt | jd|jd2}| |E d H  W d    W  qW d    n1 s0    Y  W ql ty   Y qlY ql0 qlntd| j |W Y d }~n@d }~0  ty@ } ztd| j |W Y d }~n
d }~0 0 d S )N )newliner   zError loading )	openr   r   _CSVLoader__read_fileUnicodeDecodeErrorr   r   RuntimeError	Exception)r   csvfileeZdetected_encodingsr   r   r   r   	lazy_load   s&    2

4&zCSVLoader.lazy_load)r%   r   c           	   
   #   s   t j|fi  j}t|D ]\}}z" jd ur:| j nt j}W n$ tyj   td j dY n0 d	 fdd|
 D }||d} jD ]8}z|| ||< W q ty   td| dY q0 qt||dV  qd S )	NzSource column 'z' not found in CSV file.
c                 3   sz   | ]r\}} j r| j v rn
| jvr|d ur6| n| dt|trN| nt|trjdttj|n| V  qd S )Nz: ,)r   r   strip
isinstancestrlistjoinmap).0kvr   r   r   	<genexpr>   s   

z(CSVLoader.__read_file.<locals>.<genexpr>)sourcerowzMetadata column ')Zpage_contentmetadata)csv
DictReaderr   	enumerater   r,   r   KeyError
ValueErrorr.   itemsr   r   )	r   r%   Z
csv_readerir6   r5   contentr7   colr   r3   r   Z__read_file   s*    


zCSVLoader.__read_file)Nr   NNF)__name__
__module____qualname____doc__r
   r,   r   r   r	   r   boolr   r   r   r'   r   r!   r   r   r   r   r      s&   U     	
!r   c                       s:   e Zd ZdZd	eeed fddZedddZ  Z	S )
UnstructuredCSVLoadera|  Load `CSV` files using `Unstructured`.

    Like other
    Unstructured loaders, UnstructuredCSVLoader can be used in both
    "single" and "elements" mode. If you use the loader in "elements"
    mode, the CSV file will be a single Unstructured Table element.
    If you use the loader in "elements" mode, an HTML representation
    of the table will be available in the "text_as_html" key in the
    document metadata.

    Examples
    --------
    from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader

    loader = UnstructuredCSVLoader("stanley-cups.csv", mode="elements")
    docs = loader.load()
    single)r   modeunstructured_kwargsc                    s&   t dd t jf ||d| dS )a  

        Args:
            file_path: The path to the CSV file.
            mode: The mode to use when loading the CSV file.
              Optional. Defaults to "single".
            **unstructured_kwargs: Keyword arguments to pass to unstructured.
        z0.6.8)Zmin_unstructured_version)r   rH   N)r   superr   )r   r   rH   rI   	__class__r   r   r      s    
zUnstructuredCSVLoader.__init__r   c                 C   s"   ddl m} |f d| ji| jS )Nr   )partition_csvfilename)Zunstructured.partition.csvrM   r   rI   )r   rM   r   r   r   _get_elements   s    z#UnstructuredCSVLoader._get_elements)rG   )
rA   rB   rC   rD   r,   r   r   r   rO   __classcell__r   r   rK   r   rF      s    rF   )r8   ior   pathlibr   typingr   r   r   r   r   r	   r
   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   Z,langchain_community.document_loaders.helpersr   Z1langchain_community.document_loaders.unstructuredr   r   r   rF   r   r   r   r   <module>   s   $ +