a
    ^g:                     @   s   d dl Zd dlZd dlZd dlZd dlmZ dd Zdd Zd"dd	Z	d#d
dZ
dd Zdd Zd$ddZdd Zd%ddZdd ZG dd dZG dd deZG dd dZG d d! d!ZdS )&    N)
ThreadPoolc                    sB    j \}}j ||fksJ t fddt|D }| j S )z< computes the intersection measure of two result tables
    c                 3   s$   | ]}t  | | jV  qd S N)npintersect1dsize).0iI1I2 f/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/faiss/contrib/evaluation.py	<genexpr>   s   z+knn_intersection_measure.<locals>.<genexpr>)shapesumranger   )r
   r   nqZrankninterr   r	   r   knn_intersection_measure   s    
r   c                 C   sh   | j d }||k }t| }t|D ]0}|| || | | |d     ||d < q$||| || fS )z select a set of results    )r   r   Z
zeros_liker   r   )limsDIZthreshr   maskZnew_limsr   r   r   r   filter_range_results   s    

.r   overallc                    s   fdd fddj d }j d |ks8J tj|ddfdd	}td
}||t| tdd dd  dd dd  |dS )zucompute the precision and recall of range search results. The
    function does not take the distances into account. c                    s    |  | d   S Nr   r   r   Ireflims_refr   r   ref_result_for,   s    z range_PR.<locals>.ref_result_forc                    s    |  | d   S r   r   r   )Inewlims_newr   r   new_result_for/   s    z range_PR.<locals>.new_result_forr   int64Zdtypec                    s,   | } | }t ||}t|| < d S r   )r   r   len)qgt_idsZnew_idsinter)r$   r   r!   r   r   compute_PR_for7   s    z range_PR.<locals>.compute_PR_for   Nmode)r   r   zerosr   mapr   counts_to_PR)r    r   r#   r"   r/   r   r+   poolr   )r"   r   r#   r    r$   r   r!   r   range_PR(   s    
r4   c           	      C   s   |dkrf|   |  |    } }}|dkr6|| }nd}| dkrL||  }n|dkrZd}nd}||fS |dkr| dk}d| |< ||  }|| dkt||< |dk}t|| dksJ d||< d||< || }| | fS t dS )z computes a  precision-recall for a ser of queries.
    ngt = nb of GT results per query
    nres = nb of found results per query
    ninter = nb of correct results per query (smaller than nres of course)
    r   r         ?        Zaverager   N)r   Zastypefloatr   allmeanAssertionError)	Zngtnresr   r/   	precisionZrecallr   recalls
precisionsr   r   r   r2   P   s.    

r2   c                 C   s   t |}t |}t| d }t|D ]Z}| | | |d   }}||| }	||| }
|
 }|	| |||< |
| |||< q(||fS )z& sort 2 arrays using the first as key r   )r   
empty_liker'   r   argsort)r   r   r   r   ZD2r   r   l0l1iiZdior   r   r   sort_range_res_2~   s    

rE   c                 C   sb   t |}t| d }t|D ]>}| | | |d   }}||| |||< |||   q|S r   )r   r?   r'   r   sort)r   r   r   r   r   rA   rB   r   r   r   sort_range_res_1   s    
rG   ref,newc                    s"  d|v rt d|v r*t \ fdd fddjd }jd |ksdJ t}	tj||	dfd	d
fdd}
td}||
t| t|	}t|	}t|	D ]R}t	dd|df dd|df dd|df |d\}}|||< |||< q||fS )z compute precision-recall values for range search results
    for several thresholds on the "new" results.
    This is to plot PR curves
    refnewc                    s    |  | d   S r   r   r   r   r   r   r!      s    z4range_PR_multiple_thresholds.<locals>.ref_result_forc                    s.   |  | d   }}||  || fS r   r   )r   rA   rB   )Dnewr"   r#   r   r   r$      s    z4range_PR_multiple_thresholds.<locals>.new_result_forr      r%   r&   c                    s   | }| \}}t | | d d df< |jdkr8d S t|}| | d d df< |jdkrdd S t||}d||t |k< t|| |k}tdg|f}||  | d d df< d S )Nr   r   r-      )r'   r   r   searchsortedZcumsumhstack)r(   r)   Zres_idsZres_disr;   rC   Zn_ok)countsr$   r!   
thresholdsr   r   r+      s    

z4range_PR_multiple_thresholds.<locals>.compute_PR_forr,   Nr   rM   r.   )
rG   rE   r   r'   r   r0   r   r1   r   r2   )r    r   r#   rK   r"   rQ   r/   Zdo_sortr   ntr+   r3   r>   r=   tprr   )	rK   r"   r   rP   r#   r    r$   r!   rQ   r   range_PR_multiple_thresholds   s.    



0

rV   c           	      C   s   t | |g}|  t|}t |}|dd |dd  |dd< |||k }t j|| ddd }t j||ddd }||fS )zt for two tables, cluster them by merging values closer than thr.
    Returns the cluster ids for each table element r   Nr-   right)Zside)r   rO   rF   r'   ZonesrN   )	Ztab1Ztab2ZthrtabnZdiffsZunique_valsZidx1Zidx2r   r   r   _cluster_tables_with_tolerance   s    
 rZ   h㈵>c              	   C   s   t jj| ||d t }tt|D ]}t || || krBq&|| |   }t	| | || |\}}	t 
|D ]>}
|
|d krqt||
k}|t|||f t|||f  qtq&dS )zS test that knn search results are identical, with possible ties.
    Raise if not. )rtolr-   N)r   testingZassert_allcloseunittestZTestCaser   r'   r8   maxrZ   uniqueassertEqualset)Drefr   rK   r"   r\   Ztestcaser   rU   ZDrefCZDnewCdisr   r   r   r   check_ref_knn_with_draws   s    re   c                 C   s   t j| | t| d }t|D ]}| | | |d   }}	|||	 }
|||	 }|||	 }|||	 }t |
|kr|n2dd }||
|\}
}|||\}}t j|
| t jj||dd q"dS )zM compare range search results wrt. a reference result,
    throw if it fails r   c                 S   s   |   }| | || fS r   )r@   )r   r   rD   r   r   r   sort_by_ids  s    z,check_ref_range_results.<locals>.sort_by_ids   )decimalN)r   r]   Zassert_array_equalr'   r   r8   Zassert_array_almost_equal)ZLrefrc   r   ZLnewrK   r"   r   r   rA   rB   ZIi_refZIi_newZDi_refZDi_newrf   r   r   r   check_ref_range_results	  s    ri   c                   @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )OperatingPointszw
    Manages a set of search parameters with associated performance and time.
    Keeps the Pareto optimal points.
    c                 C   s   g | _ g | _d S r   )operating_pointssuboptimal_pointsselfr   r   r   __init__,  s    zOperatingPoints.__init__c                 C   s   t dS )z1 return -1 if k1 > k2, 1 if k2 > k1, 0 otherwise NNotImplementedrn   Zk1Zk2r   r   r   compare_keys3  s    zOperatingPoints.compare_keysc                 C   s   t dS )zC parameters to say we do noting, takes 0 time and has 0 performanceNrp   rm   r   r   r   do_nothing_key7  s    zOperatingPoints.do_nothing_keyc                 C   s,   | j D ] \}}}||kr||kr dS qdS )NFT)rk   )rn   Zperf_newZt_new_perfrS   r   r   r   is_pareto_optimal;  s    z!OperatingPoints.is_pareto_optimalc                 C   s\   d}d}| j | j D ]>\}}}| ||}|dkr>||kr>|}|dk r||k r|}q||fS )z, predicts the bound on time and performance r6   r5   r   )rk   rl   rs   )rn   keymin_timemax_perfkey2rv   rS   cmpr   r   r   predict_boundsA  s    zOperatingPoints.predict_boundsc                 C   s   |  |\}}| ||S r   )r}   rw   )rn   rx   rz   ry   r   r   r   should_run_experimentO  s    z%OperatingPoints.should_run_experimentc                 C   s   |  ||rtd}|t| jk r^| j| \}}}||krT||k rT| j| j| q|d7 }q| j|||f dS | j|||f dS d S )Nr   r   TF)rw   r'   rk   rl   appendpop)rn   rx   rv   rS   r   Zop_LsZperf2t2r   r   r   add_operating_pointS  s    

z#OperatingPoints.add_operating_pointN)__name__
__module____qualname____doc__ro   rs   rt   rw   r}   r~   r   r   r   r   r   rj   &  s   rj   c                   @   s^   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Ze	j
fddZdd Zdd Zdd ZdS )OperatingPointsWithRangesz
    Set of parameters that are each picked from a discrete range of values.
    An increase of each parameter is assumed to make the operation slower
    and more accurate.
    A key = int array of indices in the ordered set of parameters.
    c                 C   s   t |  g | _d S r   )rj   ro   rangesrm   r   r   r   ro   m  s    
z"OperatingPointsWithRanges.__init__c                 C   s   | j ||f d S r   )r   r   )rn   namevaluesr   r   r   	add_ranger  s    z#OperatingPointsWithRanges.add_rangec                 C   s(   t ||krdS t ||kr$dS dS )Nr   r-   r   )r   r8   rr   r   r   r   rs   u  s
    z&OperatingPointsWithRanges.compare_keysc                 C   s   t jt| jtdS )Nr&   )r   r0   r'   r   intrm   r   r   r   rt   |  s    z(OperatingPointsWithRanges.do_nothing_keyc                 C   s   t tdd | jD S )Nc                 S   s   g | ]\}}t |qS r   )r'   )r   r   r   r   r   r   
<listcomp>      z=OperatingPointsWithRanges.num_experiments.<locals>.<listcomp>)r   r   prodr   rm   r   r   r   num_experiments  s    z)OperatingPointsWithRanges.num_experimentsc                 C   s~   |dks|dksJ |   }tjd}|dks8||k rH||d }n|j|d |d dd}d|d gdd |D  }|S )	z} sample a set of experiments of max size n_autotune
        (run all experiments in random order if n_autotune is 0)
        r   rM   {   F)r   replacer   c                 S   s   g | ]}t |d  qS )r   )r   )r   cnor   r   r   r     r   z@OperatingPointsWithRanges.sample_experiments.<locals>.<listcomp>)r   r   randomZRandomStateZpermutationchoice)rn   Z
n_autotunersZtotexZexperimentsr   r   r   sample_experiments  s    z,OperatingPointsWithRanges.sample_experimentsc                 C   sX   t jt| jtd}t| jD ](\}\}}|t| ||< |t| }q|dksTJ |S )z/Convert a sequential experiment number to a keyr&   r   )r   r0   r'   r   r   	enumerate)rn   r   kr   r   r   r   r   r   
cno_to_key  s    z$OperatingPointsWithRanges.cno_to_keyc                    s    fddt | jD S )z3Convert a key to a dictionary with parameter valuesc                    s"   i | ]\}\}}|| |  qS r   r   )r   r   r   r   r   r   r   
<dictcomp>  s   
z<OperatingPointsWithRanges.get_parameters.<locals>.<dictcomp>)r   r   )rn   r   r   r   r   get_parameters  s    
z(OperatingPointsWithRanges.get_parametersc                    sP   | j D ]4\}}||kr fdd|D }||dd<  dS qtd| ddS )z% remove too large values from a rangec                    s   g | ]}| k r|qS r   r   )r   vmax_valr   r   r     r   z<OperatingPointsWithRanges.restrict_range.<locals>.<listcomp>Nz
parameter z
 not found)r   RuntimeError)rn   r   r   Zname2r   Zval2r   r   r   restrict_range  s    z(OperatingPointsWithRanges.restrict_rangeN)r   r   r   r   ro   r   rs   rt   r   r   r   r   r   r   r   r   r   r   r   r   e  s   	r   c                   @   s   e Zd Zdd Zdd ZdS )	TimerIterc                 C   s.   g | _ |j| _|| _|jdkr*t|j d S )Nr   )tsrunstimerrR   faissomp_set_num_threads)rn   r   r   r   r   ro     s
    
zTimerIter.__init__c                 C   s   | j }|  jd8  _| jt  t| jdkrF| jd | jd  nd}| jdks^||jkr|jdkrtt	|j
 t| j}|dd  |d d  }t||jkr||jd  |_n|d d  |_td S )Nr   rM   r-   r   )r   r   r   r   timer'   max_secsrR   r   r   remember_ntr   arraywarmuptimesStopIteration)rn   r   
total_timer   r   r   r   r   __next__  s    &
zTimerIter.__next__N)r   r   r   ro   r   r   r   r   r   r     s   r   c                   @   sD   e Zd ZdZdddejfddZdd Zd	d
 Zdd Z	dd Z
dS )RepeatTimeru!  
    This is yet another timer object. It is adapted to Faiss by
    taking a number of openmp threads to set on input. It should be called
    in an explicit loop as:

    timer = RepeatTimer(warmup=1, nt=1, runs=6)

    for _ in timer:
        # perform operation

    print(f"time={timer.get_ms():.1f} ± {timer.get_ms_std():.1f} ms")

    the same timer can be re-used. In that case it is reset each time it
    enters a loop. It focuses on ms-scale times because for second scale
    it's usually less relevant to repeat the operation.
    r   r-   r   c                 C   s2   ||k sJ || _ || _|| _|| _t | _d S r   )r   rR   r   r   r   Zomp_get_max_threadsr   )rn   r   rR   r   r   r   r   r   ro     s    zRepeatTimer.__init__c                 C   s   t | S r   )r   rm   r   r   r   __iter__  s    zRepeatTimer.__iter__c                 C   s   t | jd S )N  )r   r9   r   rm   r   r   r   ms  s    zRepeatTimer.msc                 C   s"   t | jdkrt| jd S dS )Nr   r   r6   )r'   r   r   Zstdrm   r   r   r   ms_std  s    zRepeatTimer.ms_stdc                 C   s
   t | jS )zJ effective number of runs (may be lower than runs - warmup due to timeout))r'   r   rm   r   r   r   nruns  s    zRepeatTimer.nrunsN)r   r   r   r   r   infro   r   r   r   r   r   r   r   r   r     s   r   )r   )r   )r   rH   )r[   )numpyr   r^   r   r   Zmultiprocessing.poolr   r   r   r4   r2   rE   rG   rV   rZ   re   ri   rj   r   r   r   r   r   r   r   <module>   s&   

(
. 
N
?J