a
    bg                     @   sr   d Z ddlZddlmZmZmZmZmZ er<ddlm	Z	m
Z
 ddlmZ ddlmZ eeZG dd deZdS )	zRLoader that uses Selenium to load a page, then uses unstructured to load the html.    N)TYPE_CHECKINGListLiteralOptionalUnionChromeFirefox)Document)
BaseLoaderc                	   @   s   e Zd ZdZdddddg fee eed ee ee eee dddZ	e
d	 d
ddZee
d	 edddZee d
ddZdS )SeleniumURLLoadera  Load `HTML` pages with `Selenium` and parse with `Unstructured`.

    This is useful for loading pages that require javascript to render.

    Attributes:
        urls (List[str]): List of URLs to load.
        continue_on_failure (bool): If True, continue loading other URLs on failure.
        browser (str): The browser to use, either 'chrome' or 'firefox'.
        binary_location (Optional[str]): The location of the browser binary.
        executable_path (Optional[str]): The path to the browser executable.
        headless (bool): If True, the browser will run in headless mode.
        arguments [List[str]]: List of arguments to pass to the browser.
    TchromeN)r   firefox)urlscontinue_on_failurebrowserbinary_locationexecutable_pathheadless	argumentsc           
      C   s~   zddl }W n ty&   tdY n0 zddl}	W n tyN   tdY n0 || _|| _|| _|| _|| _|| _|| _	dS )z4Load a list of URLs using Selenium and unstructured.r   NzIselenium package not found, please install it with `pip install selenium`zQunstructured package not found, please install it with `pip install unstructured`)
seleniumImportErrorunstructuredr   r   r   r   r   r   r   )
selfr   r   r   r   r   r   r   r   r    r   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/url_selenium.py__init__   s&    

zSeleniumURLLoader.__init__r   )returnc           	      C   sN  | j  dkrddlm} ddlm} ddlm} | }| jD ]}|	| q>| j
rh|	d |	d | jdurz| j|_| jdu r||d	S |||| jd
dS | j  dkrBddlm} ddlm} ddlm} | }| jD ]}|	| q| j
r|	d | jdur| j|_| jdu r.||d	S |||| jd
dS tddS )a  Create and return a WebDriver instance based on the specified browser.

        Raises:
            ValueError: If an invalid browser is specified.

        Returns:
            Union[Chrome, Firefox]: A WebDriver instance for the specified browser.
        r   r   )r   )Options)Servicez
--headlessz--no-sandboxN)options)r   )r    Zservicer   )r	   z5Invalid browser specified. Use 'chrome' or 'firefox'.)r   lowerselenium.webdriverr   Z!selenium.webdriver.chrome.optionsr   Z!selenium.webdriver.chrome.servicer   r   add_argumentr   r   r   r	   Z"selenium.webdriver.firefox.optionsZ"selenium.webdriver.firefox.service
ValueError)	r   r   ZChromeOptionsr   Zchrome_optionsargr	   ZFirefoxOptionsZfirefox_optionsr   r   r   _get_driverB   sH    	










zSeleniumURLLoader._get_driver)urldriverr   c           	      C   s   ddl m} ddlm} |dddd}|j }r8||d< z(||jd	 }r^|d
pXd|d< W n |yr   Y n0 z(||jd }r|dpd|d< W n |y   Y n0 |S )Nr   )NoSuchElementException)ByzNo title found.zNo description found.zNo language found.)sourcetitledescriptionlanguager,   z//meta[@name="description"]contentr-   htmllangr.   )	Zselenium.common.exceptionsr)   Zselenium.webdriver.common.byr*   r,   Zfind_elementZXPATHZget_attributeZTAG_NAME)	r   r'   r(   r)   r*   metadatar,   r-   Zhtml_tagr   r   r   _build_metadataw   s0    


z!SeleniumURLLoader._build_metadatac           
      C   s   ddl m} t }|  }| jD ]}zP|| |j}||d}ddd |D }| ||}|	t
||d W q  ty }	 z.| jrtd| d	|	  n|	W Y d
}	~	q d
}	~	0 0 q |  |S )zLoad the specified URLs using Selenium and create Document instances.

        Returns:
            List[Document]: A list of Document instances with loaded content.
        r   )partition_html)textz

c                 S   s   g | ]}t |qS r   )str).0elr   r   r   
<listcomp>       z*SeleniumURLLoader.load.<locals>.<listcomp>)page_contentr2   zError fetching or processing z, exception: N)Zunstructured.partition.htmlr4   listr&   r   getZpage_sourcejoinr3   appendr
   	Exceptionr   loggererrorquit)
r   r4   docsr(   r'   r;   elementsr5   r2   er   r   r   load   s"    


zSeleniumURLLoader.load)__name__
__module____qualname____doc__r   r6   boolr   r   r   r   r&   dictr3   r
   rG   r   r   r   r   r      s&   #5r   )rK   loggingtypingr   r   r   r   r   r"   r   r	   Zlangchain_core.documentsr
   Z)langchain_community.document_loaders.baser   	getLoggerrH   rA   r   r   r   r   r   <module>   s   
