o
    $j%                     @  s  d Z ddlmZ ddlZddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZ erVdd	lmZmZmZ dd
lmZmZ ddlmZmZmZmZ dZ d-ddZ!dde dfd.d!d"Z"de fd/d%d&Z#de dfd0d(d)Z$de dfd1d+d,Z%dS )2z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKING)hash_object_array)is_list_like)CategoricalDtype)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)HashableIterableIterator)	ArrayLikenpt)	DataFrameIndex
MultiIndexSeriesZ0123456789123456arraysIterator[np.ndarray]	num_itemsintreturnnpt.NDArray[np.uint64]c           	      C  s   zt | }W n ty   tjg tjd Y S w t|g| } td}t|td }d}t| D ]\}}|| }||N }||9 }|td| | 7 }|}q4|d |ks\J d|td7 }|S )	z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 r   iXB    zFed in wrong num_itemsi| )	nextStopIterationnparrayuint64	itertoolschainZ
zeros_like	enumerate)	r   r   firstZmultoutZlast_iiaZ	inverse_i r)   Y/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/pandas/core/util/hashing.pycombine_hash_arrays/   s$   
r+   Tutf8objIndex | DataFrame | Seriesindexboolencodingstrhash_key
str | None
categorizer   c                   s|  ddl m} du rtttr|tdddS ttr8tj j	ddd}||ddd}|S tt
rotj j	ddd}|rd fd	d
dD }t|g|}	t|	d}||jddd}|S ttr fdd
 D }
tj}|r fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}|S tdt )a>  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object

    Examples
    --------
    >>> pd.util.hash_pandas_object(pd.Series([1, 2, 3]))
    0    14639053686158035780
    1     3869563279212530728
    2      393322362522515241
    dtype: uint64
    r   )r   Nr!   F)r   copyr6   )r/   r   r6   c                 3  &    | ]}t jd  djV  qdS F)r/   r1   r3   r5   Nhash_pandas_objectr/   _values.0_r5   r1   r3   r-   r)   r*   	<genexpr>       
z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s$    | ]\}}t |j V  qd S rC   )
hash_arrayr<   )r>   r?   Zseries)r5   r1   r3   r)   r*   rA      s
    
c                 3  r8   r9   r:   r=   r@   r)   r*   rA      rB   r   c                 s  s    | ]}|V  qd S rC   r)   )r>   xr)   r)   r*   rA      s    zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer
   hash_tuplesr	   rE   r<   astyper   r"   r#   r+   r/   r   itemslencolumns	TypeErrortype)r-   r/   r1   r3   r5   r   hZserZ
index_iterr   hashesr   Zindex_hash_generator_hashesr)   r@   r*   r;   S   sN   #

2






r;   vals+MultiIndex | Iterable[tuple[Hashable, ...]]c                   sz   t | stdddlm m} t| ts|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )Categoricalr   c              	     s,   g | ]}  j| tj| d dqS )F
categoriesordered)_simple_newcodesr   Zlevels)r>   level)rV   mir)   r*   
<listcomp>   s    zhash_tuples.<locals>.<listcomp>c                 3  s     | ]}|j  d dV  qdS )Fr1   r3   r5   N)_hash_pandas_object)r>   cat)r1   r3   r)   r*   rA      s
    
zhash_tuples.<locals>.<genexpr>)r   rO   rG   rV   r   rI   r
   from_tuplesrangeZnlevelsr+   rM   )rT   r1   r3   r   Zcat_valsrR   rQ   r)   )rV   r1   r3   r]   r*   rJ      s   
	rJ   r   c                 C  s\   t | ds	tdt| tr| j|||dS t| tjs'tdt| j dt	| |||S )a  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.

    Examples
    --------
    >>> pd.util.hash_array(np.array([1, 2, 3]))
    array([ 6238072747940578789, 15839785061582574730,  2185194620014831856],
      dtype=uint64)
    r   zmust pass a ndarray-liker_   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)
hasattrrO   rI   r   r`   r   ZndarrayrP   __name___hash_ndarray)rT   r1   r3   r5   r)   r)   r*   rE      s   

rE   
np.ndarrayc                 C  s  | j }t|tjr t| j|||}t| j|||}|d|  S |tkr*| d} nwt	|j
tjtjfr?| djddd} nbt	|j
tjrY|jdkrY| d| j j d} nH|rdd	lm}m}m}	 |	| dd
\}
}t||dd}||
|}|j||ddS zt| ||} W n ty   t| tt||} Y nw | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8Fr7      ur   )rV   r   	factorize)sortrW   r_      l   e9z    l   b&&&	    )r   r   Z
issubdtypeZ
complex128rf   realimagr0   rK   
issubclassrP   Z
datetime64Ztimedelta64viewnumberitemsizerG   rV   r   rm   r   rZ   r`   r   rO   r2   objectr!   )rT   r1   r3   r5   r   Z	hash_realZ	hash_imagrV   r   rm   r[   rX   ra   r)   r)   r*   rf     s@   	rf   )r   r   r   r   r   r   )r-   r.   r/   r0   r1   r2   r3   r4   r5   r0   r   r   )rT   rU   r1   r2   r3   r2   r   r   )
rT   r   r1   r2   r3   r2   r5   r0   r   r   )
rT   rg   r1   r2   r3   r2   r5   r0   r   r   )&__doc__
__future__r   r"   typingr   numpyr   Zpandas._libs.hashingr   Zpandas.core.dtypes.commonr   Zpandas.core.dtypes.dtypesr   Zpandas.core.dtypes.genericr   r   r	   r
   r   collections.abcr   r   r   Zpandas._typingr   r   rG   r   r   r   r   rH   r+   r;   rJ   rE   rf   r)   r)   r)   r*   <module>   s>    	
&f43