a
    ]gP                     @   s  d dl Zd dlT d dlZd dlZdd Zdd Zed fddZ	d2d
dZ
d3ddZeZd4ddZdd ZeZd5ddZdd Zdd ZeZd6ddZeZd7ddZG dd dZd8d d!ZG d"d# d#Zed$fd%d&Zd9d(d)ZG d*d+ d+Zd,d- ZeZd.d/ Ze Z!d:d0d1Z dS );    N)*c                 C   s   t j| dd} | j\}}t j||fdd}t j||fdd}t }t||_t||_||_	||_
|  ||t|  |  ||fS )zPreturn k smallest values (and their indices) of the lines of a
    float32 arrayfloat32dtypeint64)npascontiguousarrayshapezerosfaissfloat_maxheap_array_tswig_ptridsvalnhkheapifyaddnreorderarrayr   mnIDZha r   b/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/faiss/extra_wrappers.pykmin   s    


r   c                 C   s   t j| dd} | j\}}t j||fdd}t j||fdd}t }t||_t||_||_	||_
|  ||t|  |  ||fS )zOreturn k largest values (and their indices) of the lines of a
    float32 arrayr   r   r   )r   r   r	   r
   r   float_minheap_array_tr   r   r   r   r   r   r   r   r   r   r   r   kmax+   s    


r   c           	   
   C   s   t j| dd} t j|dd}| j\}}|j\}}||ks<J t j||fdd}|tkrvt||t| |t|t| n>|tkr| |j |dd< n"t	||t| |t|||t| |S )zJcompute the whole pairwise distance matrix between two sets of
    vectorsr   r   N)
r   r   r	   empty	METRIC_L2Zpairwise_L2sqrr   METRIC_INNER_PRODUCTTZpairwise_extra_distances)	xqxbmetric
metric_argnqdnbd2disr   r   r   pairwise_distances=   s*    



r-   90  c                 C   s$   t j| dd}tt||j| |S Nr   r   )r   r    Z
float_randr   sizer   seedresr   r   r   randV   s    r4   c                 C   sB   t j| dd}|d u r*tt||j| ntt||j|| |S Nr   r   )r   r    Z
int64_randr   r0   Zint64_rand_max)r   r2   Zvmaxr3   r   r   r   randint\   s
    r6   c                 C   s$   t j| dd}tt||j| |S r/   )r   r    Zfloat_randnr   r0   r1   r   r   r   randnh   s    r7   c                 C   sV   |  d} | jdkr$t| jt| S | j\}}tj|dd}t||t| t| |S )z> compute a checksum for quick-and-dirty comparisons of arrays uint8   uint64r   )	viewndimZbvec_checksumr0   r   r	   r   r
   Zbvecs_checksum)ar   r)   csr   r   r   checksumn   s    


r?     c                 C   s(   t j| |fdd}t| |t|| |S r/   )r   r    rand_smooth_vectors_cr   )r   r)   r2   r3   r   r   r   rand_smooth_vectorsz   s    rB   c              	   C   s   t j| dd} t j|dd}| jd }|jd |ks8J | jd |jd  }}d}t|D ]&}|t|t| | |t|| 7 }qZ|S )z< size of intersection between each line of two result tablesr   r   r   r9   )r   r   r	   rangeZranklist_intersection_sizer   )ZI1ZI2r   Zk1Zk2Zninterir   r   r   eval_intersection   s    
rE   c                 C   s    t | jd | jd t|  d S )Nr9   r   )Zfvec_renorm_L2r	   r   xr   r   r   normalize_L2   s    rH   c                 C   s|   t j| dd} |du r&t|  d }t j|d dd}t j| jdd}t| jt| 	d|t|t|| ||fS )a  Perform a bucket sort on a table of integers.

    Parameters
    ----------
    tab : array_like
        elements to sort, max value nbucket - 1
    nbucket : integer
        number of buckets, None if unknown
    nt : integer
        number of threads to use (0 = use unthreaded codepath)

    Returns
    -------
    lims : array_like
        cumulative sum of bucket sizes (size vmax + 1)
    perm : array_like
        perm[lims[i] : lims[i + 1]] contains the indices of bucket #i (size tab.size)
    r   r   Nr9   r:   )
r   r   intmaxr    r0   bucket_sort_cr   r   r;   )tabnbucketntlimspermr   r   r   bucket_sort   s    rQ   c                 C   sn   | j dks| j dksJ | j\}}|du r:t|  d }tj|d dd}t||t| |t|| |S )a  Perform a bucket sort on a matrix, recording the original
    row of each element.

    Parameters
    ----------
    tab : array_like
        array of size (N, ncol) that contains the bucket ids, maximum
        value nbucket - 1.
        On output, it the elements are shuffled such that the flat array
        tab.ravel()[lims[i] : lims[i + 1]] contains the row numbers
        of each bucket entry.
    nbucket : integer
        number of buckets (the maximum value in tab should be nbucket - 1)
    nt : integer
        number of threads to use (0 = use unthreaded codepath)

    Returns
    -------
    lims : array_like
        cumulative sum of bucket sizes (size vmax + 1)
    int32r   Nr9   r   )	r   r	   rI   rJ   r   r    matrix_bucket_sort_inplace_cr   r   )rL   rM   rN   ZnrowZncolrO   r   r   r   matrix_bucket_sort_inplace   s    

rT   c                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )
ResultHeapz_Accumulate query results from a sliced dataset. The final result will
    be in self.D, self.I.Fc                 C   s~   t j||fdd| _t j||fdd| _|| | _| _|rBt }nt }||_||_t	| j|_
t	| j|_|  || _dS )z
        nq: number of query vectors,
        k: number of results per query
        keep_max: keep the top-k maximum values instead of the minima
        r   r   r   N)r   r
   r   r   r(   r   r   r   r   r   r   r   r   heaps)selfr(   r   keep_maxrV   r   r   r   __init__   s    zResultHeap.__init__c                 C   sd   |j \}}tj|dd}tj|dd}|j ||fks8J || jksFJ | j|t|t|| dS )z
        Add results for all heaps
        D, I should be of size (nh, nres)
        D, I do not need to be in a particular order (heap or sorted)
        r   r   r   N)r	   r   r   r(   rV   Zaddn_with_idsr   )rW   r   r   r(   kdr   r   r   
add_result   s    
zResultHeap.add_resultc                 C   s   |j \}}|t|ksJ |jdkr0|j |j ksJ|jdkrF|j |fksJJ tj|dd}tj|dd}tj|dd}|jdkrdn|}| j|t||t|t|| dS )z
        Add results for a subset of heaps.
        D, I should hold resutls for all the subset
        as a special case, if I is 1D, then all ids are assumed to be the same
           r9   r   r   r   r   N)r	   lenr<   r   r   rV   Zaddn_query_subset_with_idsr   )rW   Zsubsetr   r   ZnsubsetrZ   Z	id_strider   r   r   add_result_subset  s$    


zResultHeap.add_result_subsetc                 C   s   | j   d S N)rV   r   )rW   r   r   r   finalize  s    zResultHeap.finalizeN)F)__name__
__module____qualname____doc__rY   r[   r^   r`   r   r   r   r   rU      s
   
rU   Fc           	   	   C   s|   |j | j ksJ | j \}}}tj||f| jd}tj||f|jd}|rLtnt}||||t| t|t|t| ||fS )z
    Merge a set of sorted knn-results obtained from different shards in a dataset
    Dall and Iall are of size (nshard, nq, k) each D[i, j] should be sorted
    returns D, I of size (nq, k) as the merged result set
    r   )r	   r   r    r   Zmerge_knn_results_CMaxZmerge_knn_results_CMinr   )	ZDallZIallrX   Znshardr   r   ZDnewZInewfuncr   r   r   merge_knn_results  s    rf   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )MapInt64ToInt64c                 C   sX   t t|| _|d| j ks&J d|| _tj|dfdd| _t| jt	| j d S )Nr\   zneed power of 2 capacityr   r   )
rI   r   log2log2_capacitycapacityr    rL   r   Zhashtable_int64_to_int64_initr   )rW   rj   r   r   r   rY   3  s
    zMapInt64ToInt64.__init__c                 C   s>   |j \}|j |fksJ t| jt| j|t|t| d S r_   )r	   r   Zhashtable_int64_to_int64_addri   r   rL   )rW   keysvalsr   r   r   r   add:  s    zMapInt64ToInt64.addc                 C   s>   |j \}tj|fdd}t| jt| j|t|t| |S r5   )r	   r   r    r   Zhashtable_int64_to_int64_lookupri   r   rL   )rW   rk   r   rl   r   r   r   lookupA  s    zMapInt64ToInt64.lookupN)ra   rb   rc   rY   rm   rn   r   r   r   r   rg   1  s   rg           c                 C   s   t j| dd} t j|dd}| j\}}|j\}}||ks<J t j||fdd}	t j||fdd}
|tkrtt| t|||||t|
t|	 nZ|tkrtt| t|||||t|
t|	 n*t	t| t|||||||t|
t|	
 |
|	fS )a  
    Compute the k nearest neighbors of a vector without constructing an index


    Parameters
    ----------
    xq : array_like
        Query vectors, shape (nq, d) where the dimension d is that same as xb
        `dtype` must be float32.
    xb : array_like
        Database vectors, shape (nb, d) where dimension d is the same as xq
        `dtype` must be float32.
    k : int
        Number of nearest neighbors.
    metric : MetricType, optional
        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)

    Returns
    -------
    D : array_like
        Distances of the nearest neighbors, shape (nq, k)
    I : array_like
        Labels of the nearest neighbors, shape (nq, k)
    r   r   r   )
r   r   r	   r    r!   Z	knn_L2sqrr   r"   Zknn_inner_productZknn_extra_metrics)r$   r%   r   r&   r'   r(   r)   r*   r+   r   r   r   r   r   knnM  s.    

rp   hcc                 C   s   | j \}}|j \}}||ks J tj||fdd}tj||fdd}	|dkrt }
||
_||
_t|	|
_t||
_	t
|
t| t|||d n>|dkrtt| t|||||t|t|	 nt||	fS )a  
    Compute the k nearest neighbors of a set of vectors without constructing an index.

    Parameters
    ----------
    xq : array_like
        Query vectors, shape (nq, d) where d is the number of bits / 8
        `dtype` must be uint8.
    xb : array_like
        Database vectors, shape (nb, d) where d is the number of bits / 8
        `dtype` must be uint8.
    k : int
        Number of nearest neighbors.
    variant : string
        Function variant to use, either "mc" (counter) or "hc" (heap)

    Returns
    -------
    D : array_like
        Distances of the nearest neighbors, shape (nq, k)
    I : array_like
        Labels of the nearest neighbors, shape (nq, k)
    rR   r   r   rq   r9   Zmc)r	   r   r    r   Zint_maxheap_array_tr   r   r   r   r   Zhammings_knn_hcZhammings_knn_mcNotImplementedError)r$   r%   r   variantr(   r)   r*   r+   r   r   heapr   r   r   knn_hamming  s,    

ru   c                   @   s<   e Zd ZdZdd Zdd ZdddZdd	d
Zdd ZdS )Kmeansa  Object that performs k-means clustering and manages the centroids.
    The `Kmeans` class is essentially a wrapper around the C++ `Clustering` object.

    Parameters
    ----------
    d : int
       dimension of the vectors to cluster
    k : int
       number of clusters
    gpu: bool or int, optional
       False: don't use GPU
       True: use all GPUs
       number: use this many GPUs
    progressive_dim_steps:
        use a progressive dimension clustering (with that number of steps)

    Subsequent parameters are fields of the Clustring object. The most important are:

    niter: int, optional
       clustering iterations
    nredo: int, optional
       redo clustering this many times and keep best
    verbose: bool, optional
    spherical: bool, optional
       do we want normalized centroids?
    int_centroids: bool, optional
       round centroids coordinates to integer
    seed: int, optional
       seed for the random number generator

    c                 K   s   || _ | | d| _d|v r(t | _nt | _| D ]H\}}|dkrf|dksX|dkr^t }|| _q8t| j| t	| j|| q8| 
  dS )zd: input dimension, k: nb of centroids. Additional
         parameters are passed on the ClusteringParameters object,
         including niter=25, verbose=False, spherical = False
        FZprogressive_dim_stepsgpuTN)r)   resetrw   Z"ProgressiveDimClusteringParameterscpClusteringParametersitemsZget_num_gpusgetattrsetattr	set_index)rW   r)   r   kwargsvr   r   r   rY     s    

zKmeans.__init__c                 C   sp   | j }| jjtkrL| jjr&t|| _n
t|| _| jrlt	j
| j| jd| _n | jr`t| jd}nt }|| _d S )N)Zngpu)r)   rz   	__class__r{   	sphericalZIndexFlatIPindexZIndexFlatL2rw   r   Zindex_cpu_to_all_gpusZGpuProgressiveDimIndexFactoryZProgressiveDimIndexFactoryfac)rW   r)   r   r   r   r   r     s    
zKmeans.set_indexNc                 C   s(   |durt || _d| _d| _d| _dS )zg prepare k-means object to perform a new clustering, possibly
        with another number of centroids N)rI   r   	centroidsobjiteration_stats)rW   r   r   r   r   ry     s
    
zKmeans.resetc           
         sV  t j|dd}|j\}}|| jks&J | jjtkrt|| j| j}|durr|j\}}||ks`J t	
| |j ||| j| nH|du sJ |du sJ | jjrJ t|| j| j}||t|| j t	|j}	|	| j|| _|jfddt D t dd D | _d   fddD | _| jjd	krR| jd
 S dS )a   Perform k-means clustering.
        On output of the function call:

        - the centroids are in the centroids field of size (`k`, `d`).

        - the objective value at each iteration is in the array obj (size `niter`)

        - detailed optimization statistics are in the array iteration_stats.

        Parameters
        ----------
        x : array_like
            Training vectors, shape (n, d), `dtype` must be float32 and n should
            be larger than the number of clusters `k`.
        weights : array_like
            weight associated to each vector, shape `n`
        init_centroids : array_like
            initial set of centroids, shape (n, d)

        Returns
        -------
        final_obj: float
            final optimization objective

        r   r   Nc                    s   g | ]}  |qS r   )at).0rD   )statsr   r   
<listcomp>>      z Kmeans.train.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   )r   )r   str   r   r   r   ?  r   z,obj time time_search imbalance_factor nsplitc                    s   g | ]  fd dD qS )c                    s   i | ]}|t  |qS r   )r}   )r   fieldr   r   r   
<dictcomp>C  r   z+Kmeans.train.<locals>.<listcomp>.<dictcomp>r   )r   )stat_fieldsr   r   r   B  s   r   rx   ro   )r   r   r	   r)   rz   r   r{   Z
Clusteringr   r   Zcopy_array_to_vectorravelr   trainr   r   ZProgressiveDimClusteringr   r   Zvector_float_to_arrayZreshaper   rC   r0   r   r   split)
rW   rG   weightsZinit_centroidsr   r)   Zclusncr+   r   r   )r   r   r   r     s2    


zKmeans.trainc                 C   sZ   t j|dd}| jd us J d| j  | j| j | j|d\}}| | fS )Nr   r   zshould train before assigningr9   )r   r   r   r   ry   rm   searchr   )rW   rG   r   r   r   r   r   assignH  s    
zKmeans.assign)N)NN)	ra   rb   rc   rd   rY   r   ry   r   r   r   r   r   r   rv     s    
	
<rv   c                 C   s   t | tjjS r_   )
isinstancecollectionsabcSequencerF   r   r   r   is_sequenceU  s    r   c                 C   s   | j \}}tj| dd} t|rtj|dd}|j |fks>J t| d d }tj||fdd}t||t|t| t|| n<|| d d }tj||fdd}t|||t| t|| |S )a>  
    Pack a set integers (i, j) where i=0:n and j=0:M into
    n bitstrings.
    Output is an uint8 array of size (n, code_size), where code_size is
    such that at most 7 bits per code are wasted.

    If nbit is an integer: all entries takes nbit bits.
    If nbit is an array: entry (i, j) takes nbit[j] bits.
    rR   r         r8   )	r	   r   r   r   rI   sumr    pack_bitstrings_cr   )r=   nbitr   M	code_sizebr   r   r   pack_bitstringsZ  s    

r   c                 C   s   | j \}}|du rztj|dd}t|}t| d d }||ksHJ tj||fdd}t||t|t| |t| nL|}|| d d }||ksJ tj||fdd}t|||t| |t| |S )a  
    Unpack a set integers (i, j) where i=0:n and j=0:M from
    n bitstrings (encoded as uint8s).
    Input is an uint8 array of size (n, code_size), where code_size is
    such that at most 7 bits per code are wasted.

    Two forms:
    - when called with (array, M, nbit): there are M entries of size
      nbit per row
    - when called with (array, nbits): element (i, j) is encoded in
      nbits[j] bits
    NrR   r   r   r   )	r	   r   r   r]   rI   r   r    unpack_bitstrings_cr   )r   Z
M_or_nbitsr   r   r   r   Zmin_code_sizer=   r   r   r   unpack_bitstringsu  s&    

r   )r.   )r.   N)r.   )r@   )Nr   )Nr   )F)rq   )N)"numpyr   Zfaiss.loaderr   collections.abcr   r   r   r!   r-   r4   r6   Zlrandr7   r?   rB   rA   rE   rH   rQ   rK   rT   rS   rU   rf   rg   rp   ru   rv   r   r   r   r   r   r   r   r   r   <module>   s>   

	




'@
6
8 