a
    ag>                     @   s   U d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZmZmZ erbd dlmZmZmZmZ h dZe
e ed< G dd deZG d	d
 d
ZdS )    N)platform)
TYPE_CHECKINGAnyDictIterableListOptionalSetTuple	TypedDictUnion)Browser
CDPSessionPagesync_playwright>   htmlmetabrZiframebodystylepathheadtitlescriptz::markersvgblack_listed_elementsc                   @   sn   e Zd ZU dZeed< eed< ee ed< ee ed< ee ed< e	ed< eed< eed	< eed
< eed< dS )ElementInViewPortzIA typed dictionary containing information about elements in the viewport.
node_indexbackend_node_id	node_name
node_value	node_metais_clickableorigin_xorigin_ycenter_xcenter_yN)
__name__
__module____qualname____doc__str__annotations__intr   r   bool r/   r/   m/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain/chains/natbot/crawler.pyr   $   s   
r   c                   @   s   e Zd ZdZddddZeddddZedd	d
dZeee	f ddddZ
eee	f eddddZddddZee dddZdS )Crawlera   A crawler for web pages.

    **Security Note**: This is an implementation of a crawler that uses a browser via
        Playwright.

        This crawler can be used to load arbitrary webpages INCLUDING content
        from the local file system.

        Control access to who can submit crawling requests and what network access
        the crawler has.

        Make sure to scope permissions to the minimal permissions necessary for
        the application.

        See https://python.langchain.com/docs/security for more information.
    N)returnc                 C   sl   zddl m} W n ty*   tdY n0 |  jjdd| _| j | _| j	ddd |  |  d S )	Nr   )r   z\Could not import playwright python package. Please install it with `pip install playwright`.F)Zheadlessi   i8  )widthheight)
playwright.sync_apir   ImportErrorstartZchromiumZlaunchZbrowserZnew_pagepageZset_viewport_size)selfr   r/   r/   r0   __init__E   s    
zCrawler.__init__)urlr2   c                 C   s:   | j jd|v r|nd| d | j j| j | _i | _d S )Nz://zhttp://)r;   )r8   ZgotocontextZnew_cdp_sessionclientpage_element_buffer)r9   r;   r/   r/   r0   
go_to_pageU   s    zCrawler.go_to_page)	directionr2   c                 C   s.   |dkr| j d n|dkr*| j d d S )Nupz(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;Zdownz(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;)r8   evaluate)r9   r@   r/   r/   r0   scrollZ   s    zCrawler.scroll)idr2   c                 C   sR   d}| j | | jt|}|rF|d }|d }| j j|| ntd d S )Nz
		links = document.getElementsByTagName("a");
		for (var i = 0; i < links.length; i++) {
			links[i].removeAttribute("target");
		}
		r%   r&   zCould not find element)r8   rB   r>   getr-   mouseclickprint)r9   rD   jselementxyr/   r/   r0   rG   d   s    zCrawler.click)rD   textr2   c                 C   s   |  | | jj| d S )N)rG   r8   keyboardtype)r9   rD   rM   r/   r/   r0   rO   w   s    
zCrawler.typec                 C   s   | j jd d S )NZEnter)r8   rN   Zpress)r9   r/   r/   r0   enter{   s    zCrawler.enterc           K         s  | j }| j}t }g }|d}tdkr6|dkr6d}|d}|d}|d}|d}	|| }
||	 }d}d}|d	d	d
t|dt|d | j	dg ddd}|d |d d	 }|d }|d }|d }|d }|d |d t
|d d }|d }|d }|d }|d }|d }|d }d	}i }g }ddi}ddi}tt tt td d!d"} tttf tt tttf d#fd$d%}!tttttt f f tttt ttttt f d& fd'd( tD ]\}"}#|" }$|#  }% |d)|"|%|$\}&}' |d*|"|%|$\}(})z||"}W n   Y qY n0 |%tv rXq|| \}*}+},}-|*| }*|+| }+|,| },|-| }-|*}.|+}/|*|, }0|+|- }1|.|
k o|0|ko|/|k o|1|k}2|2sАqg }3|!||" g d+}4|&p|(}5|5sd n|&rt|'nt|)}6|5sd n|t|6g }7|%d,krv|5rv|7rv||"  }8|8d-ks|8d.krdq|7d/|8d0 nr|%d1kr|4d/d2ks|%d*krd*}%|4d/d  |4D ]8}9|5r|7r|7d3|9|4|9 d4 n|3|4|9  qd }:||" d	kr||"  }:|:d-krbqnL|%d1krb|"|v rb|:d u rb||"};||; }<|;d	krb|<d	krb|< }:|5r|%d)kr|%d*krq|t|"||" |%|:|3|"|v t|*t|+t|*|,d  t|+|-d  d5
 qg }=d	}>|D ]}?|?d6}@|?d7}%|?d8}:|?d9}A|?d:}B|:r$|: d;nd<}Cd<}D|@|v r||@ D ]Z}E|Ed/}F|Ed}G|Fd3kr|Br|Ed=}H|B|H d>|G d? n|C|G d;7 }Cq>|Brd;|B}Id;|I }D|Cd<kr|C  }C| |%|A}J|Jd*ks|Dd<kr |Jd@kr |Jd1kr |JdAkr |JdBkr |C d<kr q|?||>< |Cd<kr\|=dC|J dD|> |D dE|C dF|J dE
 n|=dC|J dD|> |D dG |>d7 }>qtdHt |  |=S )INzwindow.devicePixelRatiodarwin      zwindow.pageYOffsetzwindow.pageXOffsetzwindow.screen.widthzwindow.screen.heightr   z[scrollbar {:0.2f}-{:0.2f}%])rK   rL   rM   zDOMSnapshot.captureSnapshotT)ZcomputedStylesZincludeDOMRectsZincludePaintOrderstringsZ	documentsnodesZbackendNodeId
attributesZ	nodeValueZparentIndexZnodeNameZisClickableindexZ
inputValuevaluelayoutZ	nodeIndexboundsz-1FN)r   has_click_handlerr2   c                 S   s<   | dkrdS | dkrdS | dkr$dS | dks0|r4dS dS d S )NalinkinputimgbuttonrM   r/   )r   r\   r/   r/   r0   convert_name   s    z#Crawler.crawl.<locals>.convert_name)rV   keysr2   c                    sd   i }t t| fd  D ]H\}}|dk r(q | } | }||v r|||< || |s|  S q|S )NrS   r   )zipiterremove)rV   rc   valuesZ	key_indexZvalue_indexkeyrX   )rT   r/   r0   find_attributes   s    

z&Crawler.crawl.<locals>.find_attributes)	hash_treetagnode_idr   	parent_idr2   c                    sx   t |}|| vr8|   }| } | |||| | | \}}	||krVd|f}
n|rdd|	f}
nd}
|
| t |< |
S )NTr[   )r+   lower)rj   rk   rl   r   rm   Zparent_id_strparent_nameZgrand_parent_idZis_parent_desc_anchor	anchor_idrX   add_to_hash_treeZ
node_namesparentrT   r/   r0   rr      s     


z'Crawler.crawl.<locals>.add_to_hash_treer]   ra   )rO   placeholderz
aria-labelr   Zaltz#text|u   •rO   )rO   rX   r_   submit	attribute)rO   rh   rX   )
r   r   r   r    r!   r"   r#   r$   r%   r&   r   r   r    r"   r!     rh   z=""r^   r`   Ztextarea<z id=>z</z/>zParsing time: {:0.2f} seconds)r8   r>   timerB   r   appendformatroundr=   sendsetr   r+   r.   r   r-   r   r   r
   	enumeratern   rW   r   
setdefaultrE   popjoinstriprH   )Kr9   r8   r>   r7   Zpage_state_as_textZdevice_pixel_ratioZwin_upper_boundZwin_left_boundZ	win_widthZ
win_heightZwin_right_boundZwin_lower_boundZpercentage_progress_startZpercentage_progress_endtreeZdocumentrU   r   rV   r    r"   Zinput_valueZinput_value_indexZinput_value_valuesrY   Zlayout_node_indexrZ   cursorZchild_nodesZelements_in_view_portZanchor_ancestryZbutton_ancestryrb   ri   rW   Znode_name_indexZnode_parentr   Zis_ancestor_of_anchorrp   Zis_ancestor_of_buttonZ	button_idrK   rL   r3   r4   Zelem_left_boundZelem_top_boundZelem_right_boundZelem_lower_boundZpartially_is_in_viewportZ	meta_dataZelement_attributesZancestor_exceptionZancestor_node_keyZancestor_noderM   rh   Zelement_node_valueZnode_input_text_indexZ
text_indexZelements_of_interestZ
id_counterrJ   r   Znode_is_clickableZnode_meta_dataZ
inner_textr   childZ
entry_typeZentry_valueZ	entry_keyZmeta_stringZconverted_node_namer/   rq   r0   crawl~   s   







#




	

















 zCrawler.crawl)r'   r(   r)   r*   r:   r+   r?   rC   r   r-   rG   rO   rP   r   r   r/   r/   r/   r0   r1   3   s   
r1   )r}   sysr   typingr   r   r   r   r   r   r	   r
   r   r   r5   r   r   r   r   r   r+   r,   r   r1   r/   r/   r/   r0   <module>   s   
0