a
    bg<                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ eeZG d	d
 d
eZG dd dZ G dd deZ!G dd deZ"dS )    )annotationsN)
HTMLParser)TYPE_CHECKINGAnyDictListOptionalTupleUnion)
BaseLoader)Document)TextSplitter)
Connectionc                      sV   e Zd ZdZdd fddZddddd	d
ZdddddZddddZ  ZS )ParseOracleDocMetadatazParse Oracle doc metadata...Nonereturnc                   s"   t    |   d| _i | _d S )NF)super__init__resetmatchmetadataself	__class__ {/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/oracleai.pyr   %   s    
zParseOracleDocMetadata.__init__strzList[Tuple[str, Optional[str]]])tagattrsr   c                 C  sP   |dkr>d}|D ]*\}}|dkr$|}|dkr|r|| j |< qn|dkrLd| _d S )Nmeta namecontenttitleT)r   r   )r   r   r    entryr#   valuer   r   r   handle_starttag+   s    z&ParseOracleDocMetadata.handle_starttag)datar   c                 C  s   | j r|| jd< d| _ d S )Nr%   F)r   r   )r   r)   r   r   r   handle_data7   s    
z"ParseOracleDocMetadata.handle_dataDict[str, Any]c                 C  s   | j S N)r   r   r   r   r   get_metadata<   s    z#ParseOracleDocMetadata.get_metadata)	__name__
__module____qualname____doc__r   r(   r*   r-   __classcell__r   r   r   r   r   "   s
   r   c                   @  s>   e Zd ZdZeddddddZeddd	d
dddZdS )OracleDocReaderzRead a fileNzUnion[str, None]r   )input_stringr   c           	      C  s   d}d}| d u r$d tjddd} tt }td|}t| 	 
 }|d | }tdtd}|| | }| }||}|d | }|S )N       r"   Z>abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789   )kz>I)joinrandomchoicesinttimestructpackhashlibsha256encodedigestgetrandbitshexzfill)	r4   Z
out_lengthZhash_len	timestampZtimestamp_binZhashval_binZcounter_binZ	object_idZobject_id_hexr   r   r   generate_object_idF   s*    z"OracleDocReader.generate_object_idr   dictzUnion[Document, None])conn	file_pathparamsr   c              
   C  s  i }zddl }W n. ty> } ztd|W Y d}~n
d}~0 0 z2d|j_|  }t|d}| }W d   n1 s|0    Y  |du rtd|dW S ||j	}	||j	}
|j
d|t||	|
d	 |  |	du ri }n<t|	 }|d
s|dr t }|| | }t| jd | }||d< ||d< |
du r\td|dW S tt|
 |dW S W nP ty } z6td|  td|  |  W Y d}~dS d}~0 0 dS )zRead a file using OracleReader
        Args:
            conn: Oracle Connection,
            file_path: Oracle Directory,
            params: ONNX file name.
        Returns:
            Plain text and metadata as Langchain Document.
        r   NIUnable to import oracledb, please install with `pip install -U oracledb`.Frbr"   Zpage_contentr   a  
                declare
                    input blob;
                begin
                    input := :blob;
                    :mdata := dbms_vector_chain.utl_to_text(input, json(:pref));
                    :text := dbms_vector_chain.utl_to_text(input);
                end;)ZblobZprefmdatatext<!DOCTYPE html<HTML>$_oid_fileAn exception occurred :: zSkip processing )oracledbImportErrordefaults
fetch_lobscursoropenreadr   varZDB_TYPE_CLOBexecutejsondumpscloser   getvalue
startswithr   feedr-   r3   rH   username	Exceptionloggerinfo)rJ   rK   rL   r   rX   er\   fr)   rP   rQ   Zdoc_datapdoc_idexr   r   r   	read_filei   s\    &

zOracleDocReader.read_file)N)r.   r/   r0   r1   staticmethodrH   rp   r   r   r   r   r3   C   s
   "r3   c                      s8   e Zd ZdZdddd fddZdd	d
dZ  ZS )OracleDocLoaderzwRead documents using OracleDocLoader
    Args:
        conn: Oracle Connection,
        params: Loader parameters.
    r   r+   r   )rJ   rL   kwargsc                   s.   || _ tt|| _t jf i | d S r,   )rJ   ra   loadsrb   rL   r   r   )r   rJ   rL   rs   r   r   r   r      s    zOracleDocLoader.__init__zList[Document]r   c              
   C  s  zddl }W n. ty: } ztd|W Y d}~n
d}~0 0 d}g }i }ddi}z| jdur| jd| _| jd| _| jd| _| jd	| _| jd
| _nt	dd|j
_| jrt| j| j|}|du r|W S || | jr`d}t| jD ]`}	tj| j|	}
tj|
rt| j|
|}|du rT|d }td| d q|| q| jrfz| jdu s| jdu rt	d| j }| jd| _| jdur"t| jdkrt	dd}|j|| j | j d | }|D ],}|d | jv r|d dvrt	dqd| _| jdurR| jD ]}| jd | | _q:d| j d t| d | j d | j d | j d | j d  }|| |D ]r}i }|du rt | jj!d! | j d! | j d! | j }||d"< |t"d#|d$ q|d durLt#|d }|$d%s4|$d&rLt% }|&| |' }t | jj!d! | j d! | j d! | j d! t#|d'  }||d"< |d' |d(< | jdurt| j}t(d|D ]}||d'  || j| < q|d du r|t"d#|d$ n|t"t#|d |d$ qW nH t	yd } z.td)|  t)*  |+   W Y d}~n
d}~0 0 |W S  t	y } z&td)|  t)*   W Y d}~n
d}~0 0 dS )*z,Load data into LangChain Document objects...r   NrM   Z	plaintextfalsefiledirowner	tablenamecolnamezMissing loader parametersF   zTotal skipped: 
z%Missing owner or column name or both.
mdata_cols   z?Exceeds the max number of columns you can request for metadata.zgselect column_name, data_type from all_tab_columns where owner = :ownername and table_name = :tablename)Z	ownernamery   )NUMBERZBINARY_DOUBLEZBINARY_FLOATLONGZDATE	TIMESTAMPZVARCHAR2zDThe datatype for the column requested for metadata is not supported.z, rowidz, z'select dbms_vector_chain.utl_to_text(t.z, json('z+')) mdata, dbms_vector_chain.utl_to_text(t.z) textz from .z trT   rU   r"   rO   rR   rS      Z_rowidrW   ),rX   rY   rL   getrv   rw   rx   ry   rz   rh   rZ   r[   r3   rp   rJ   appendoslistdirpathr9   isfileri   rj   r\   r}   lenr`   upperZfetchallZmdata_cols_sqlra   rb   rH   rg   r   r   re   r   rf   r-   range	traceback	print_excrc   )r   rX   rk   Zncolsresultsr   Zm_paramsdocZ
skip_count	file_namerK   r\   sqlrowsrowcolrn   r)   rm   iro   r   r   r   load   sH   



	
	



	



zOracleDocLoader.load)r.   r/   r0   r1   r   r   r2   r   r   r   r   rr      s   rr   c                      s<   e Zd ZdZddddd fddZd	d
dddZ  ZS )OracleTextSplitterz$Splitting text using Oracle chunker.r   r+   r   r   )rJ   rL   rs   r   c              
     s   || _ || _t jf i | zTddl}zddl}W n. tyb } ztd|W Y d}~n
d}~0 0 || _|| _W n ty   tdY n0 dS )zInitialize.r   NrM   zjoracledb or json or both are not installed. Please install them. Recommendations: `pip install oracledb`. )	rJ   rL   r   r   ra   rX   rY   	_oracledb_json)r   rJ   rL   rs   ra   rX   rk   r   r   r   r     s&    
zOracleTextSplitter.__init__r   z	List[str])rQ   r   c           	   
   C  s   zddl }W n. ty: } ztd|W Y d}~n
d}~0 0 g }ztd| jj_| j }|j|jd |j	d|| j
| jd | }|du rq| j
|d }||d  q~|W S  ty } z&td	|  t   W Y d}~n
d}~0 0 dS )
z&Split incoming text and return chunks.r   NrM   F)r$   zUselect t.column_value from dbms_vector_chain.utl_to_chunks(:content, json(:params)) t)r$   rL   Z
chunk_datarW   )rX   rY   r   rZ   r[   rJ   r\   ZsetinputsizesZCLOBr`   r   rb   rL   Zfetchonert   r   rh   ri   rj   r   r   )	r   rQ   rX   rk   Zsplitsr\   r   dro   r   r   r   
split_text  s8    

zOracleTextSplitter.split_text)r.   r/   r0   r1   r   r   r2   r   r   r   r   r   |  s   r   )#
__future__r   r@   ra   loggingr   r:   r>   r=   r   html.parserr   typingr   r   r   r   r   r	   r
   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_text_splittersr   rX   r   	getLoggerr.   ri   r   r3   rr   r   r   r   r   r   <module>	   s*   $
!u E