a
    bgi                     @  sp   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ G dd deZdS )    )annotationsN)AnyDictIteratorListOptionalTuple)Document)
BaseLoaderc                   @  s   e Zd ZdZdddddddddZd	d
ddZddddddZddddddZdddddZd	ddddZ	dd
ddZ
dS )AthenaLoaderaY  Load documents from `AWS Athena`.

    Each document represents one row of the result.
    - By default, all columns are written into the `page_content` of the document
    and none into the `metadata` of the document.
    - If `metadata_columns` are provided then these columns are written
    into the `metadata` of the document while the rest of the columns
    are written into the `page_content` of the document.

    To authenticate, the AWS client uses this method to automatically load credentials:
    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html

    If a specific credential profile should be used, you must pass
    the name of the profile from the ~/.aws/credentials file that is to be used.

    Make sure the credentials / roles used have the required policies to
    access the Amazon Textract service.
    NstrzOptional[str]zOptional[List[str]])querydatabases3_output_uriprofile_namemetadata_columnsc           	   
   C  s   || _ || _|| _|dur|ng | _zddl}W n tyJ   tdY n0 z |durb|j|dn| }W n. ty } ztd|W Y d}~n
d}~0 0 |	d| _
|	d| _dS )ag  Initialize Athena document loader.

        Args:
            query: The query to run in Athena.
            database: Athena database.
            s3_output_uri: Athena output path.
            profile_name: Optional. AWS credential profile, if profiles are being used.
            metadata_columns: Optional. Columns written to Document `metadata`.
        Nr   zRCould not import boto3 python package. Please install it with `pip install boto3`.)r   zCould not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid.ZathenaZs3)r   r   r   r   boto3ImportErrorSession	Exception
ValueErrorclientathena_client	s3_client)	selfr   r   r   r   r   r   sessione r   y/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/athena.py__init__!   s.    
zAthenaLoader.__init__zList[Dict[str, Any]])returnc                 C  s   | j j| jd| jid| jid}|d }| j j|d}|d d d }|d	krRqn@|d
kr|d d }|d }d| }t|n|dkrtdtd q(| 	|}t
|jddS )NZDatabaseZOutputLocation)ZQueryStringZQueryExecutionContextZResultConfigurationQueryExecutionId)r!   ZQueryExecutionStatusStateZ	SUCCEEDEDZFAILEDZStateChangeReasonzQuery Failed: 	CANCELLEDz Query was cancelled by the user.   records)Zorient)r   Zstart_query_executionr   r   r   Zget_query_executionr   timesleep_get_result_setjsonloadsto_json)r   responsequery_execution_idstateZresp_statusZstate_change_reasonerrZ
result_setr   r   r   _execute_queryO   s,    


zAthenaLoader._execute_query)input_stringsuffixr    c                 C  s$   |r | |r |d t|  S |S N)endswithlenr   r2   r3   r   r   r   _remove_suffixi   s    zAthenaLoader._remove_suffixc                 C  s"   |r| |r|t|d  S |S r4   )
startswithr6   r7   r   r   r   _remove_prefixn   s    zAthenaLoader._remove_prefixr   )r.   r    c           	      C  s   zdd l }W n ty&   tdY n0 | j}| | |ddd}|d }d|dd  |g d }| jj||d}|j	t
|d  d	d
}|S )Nr   zTCould not import pandas python package. Please install it with `pip install pandas`./zs3://r%   z.csv)ZBucketKeyZBodyutf8)encoding)Zpandasr   r   r:   r8   splitjoinr   Z
get_objectZread_csvioBytesIOread)	r   r.   pdZ
output_uritokensZbucketkeyobjZdfr   r   r   r)   s   s"    
zAthenaLoader._get_result_setzTuple[List[str], List[str]])query_resultr    c                 C  sJ   g }g }t |d  }|D ]$}|| jv r6|| q|| q||fS )Nr   )listkeysr   append)r   rH   content_columnsr   Zall_columnsrF   r   r   r   _get_columns   s    
zAthenaLoader._get_columnszIterator[Document]c                 #  sh   |   }| |\ |D ]H}d fdd| D }fdd| D }t||d}|V  qd S )N
c                 3  s(   | ] \}}| v r| d | V  qdS )z: Nr   .0kv)rL   r   r   	<genexpr>   s   z)AthenaLoader.lazy_load.<locals>.<genexpr>c                   s&   i | ]\}}| v r|d ur||qS r4   r   rO   )r   r   r   
<dictcomp>   s   z*AthenaLoader.lazy_load.<locals>.<dictcomp>)page_contentmetadata)r1   rM   r@   itemsr	   )r   rH   rowrU   rV   docr   )rL   r   r   	lazy_load   s    
zAthenaLoader.lazy_load)NN)__name__
__module____qualname____doc__r   r1   r8   r:   r)   rM   rZ   r   r   r   r   r      s     .r   )
__future__r   rA   r*   r'   typingr   r   r   r   r   r   Zlangchain_core.documentsr	   Z)langchain_community.document_loaders.baser
   r   r   r   r   r   <module>   s    