o
    $jD                     @  st  d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlZd d	lmZmZ d d
lmZ d dlmZmZ d dlmZmZmZmZ d dlm Z m!Z!m"Z" erhd dlm#Z# ej$ej%ej&ej'ej(ej)ej)dZ*ej&ej+dfej)ej,e
fej$ej-dfej%ej-dfej'ej-dfej.ej,dfej(ej/d fiZ0ej-dej+dej,diZ1G dd deZ2dS )    )annotations)TYPE_CHECKINGAnyN)infer_dtype)iNaT)NoBufferPresent)cache_readonly)BaseMaskedDtype)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBufferPandasBufferPyarrow)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)Buffer)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s   e Zd ZdZd1d2d	d
Zd3ddZed3ddZed4ddZ	d4ddZ
edd Zedd Zed3ddZed5ddZd3ddZd6d7d#d$Zd8d&d'Zd9d)d*Zd:d,d-Zd;d/d0Zd S )<PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tcolumn	pd.Series
allow_copyboolreturnNonec                 C  sN   t |tjrtd|j dt |tjstdt| d|| _|| _	dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepdZ	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfr    r"    r1   _/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/pandas/core/interchange/column.py__init__T   s   
zPandasColumn.__init__intc                 C  s   | j jS )z2
        Size of the column, in elements.
        )r.   sizer0   r1   r1   r2   r5   h   s   zPandasColumn.sizec                 C     dS )z7
        Offset of first element. Always zero.
        r   r1   r6   r1   r1   r2   offsetn   s   zPandasColumn.offsettuple[DtypeKind, int, str, str]c                 C  s~   | j j}t|tjr!| j jj}| |j\}}}}tj	||t
jfS t|r:t| j dv r6tjdt|t
jfS td| |S )N)stringempty   z.Non-string object dtypes are not supported yet)r.   dtyper'   r(   ZCategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r,   )r0   r=   r?   _ZbitwidthZc_arrow_dtype_f_strr1   r1   r2   r=   v   s.   


zPandasColumn.dtypec                 C  s   t |jd}|du rtd| dt|tr|jj}nt|tr'|j	j}nt|t
r1|jj}n|j}|dkr@||jtj|fS ||jd t||fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolzbool[pyarrow]r<   )	_NP_KINDSgetkind
ValueErrorr'   r
   Znumpy_dtype	byteorderr   baser	   itemsizer   BOOLr   )r0   r=   rH   rJ   r1   r1   r2   r@      s"   





z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstd| jjjdtt	| jjj
dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)Z
is_orderedZis_dictionary
categories)r=   r   rA   r)   r.   catorderedr   r(   r+   rN   r6   r1   r1   r2   describe_categorical   s   z!PandasColumn.describe_categoricalc                 C  s   t | jjtrtj}d}||fS t | jjtr/| jjjj	d 
 d d u r*tjd fS tjdfS | jd }zt| \}}W ||fS  tyN   td| dw )N   r   rE   z not yet supported)r'   r.   r=   r	   r   USE_BYTEMASKr
   array	_pa_arraychunksbuffersNON_NULLABLEZUSE_BITMASK_NULL_DESCRIPTIONKeyErrorr,   )r0   Zcolumn_null_dtypeZ
null_valuerH   nullvaluer1   r1   r2   describe_null   s   


zPandasColumn.describe_nullc                 C  s   | j    S )zB
        Number of null elements. Should always be known.
        )r.   Zisnasumitemr6   r1   r1   r2   
null_count   s   zPandasColumn.null_countdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r.   indexr6   r1   r1   r2   metadata   s   zPandasColumn.metadatac                 C  r7   )zE
        Return the number of chunks the column consists of.
        rR   r1   r6   r1   r1   r2   
num_chunks   s   zPandasColumn.num_chunksNn_chunks
int | Nonec                 c  sv    |r6|dkr6t | j}|| }|| dkr|d7 }td|| |D ]}t| jj|||  | jV  q"dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rR   r   N)lenr.   ranger   Zilocr/   )r0   re   r5   stepstartr1   r1   r2   
get_chunks   s   

zPandasColumn.get_chunksr   c                 C  s\   |   ddd}z|  |d< W n	 ty   Y nw z	|  |d< W |S  ty-   Y |S w )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsrm   rn   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r0   rW   r1   r1   r2   get_buffers  s    zPandasColumn.get_buffers.tuple[Buffer, tuple[DtypeKind, int, str, str]]c           	      C  s  | j d tjtjtjtjtjfv ri| j }| j d tjkr/t| j d dkr/| jj	
d }n/| jj}t| jj tr>|j}n t| jj tr[|jjd }t| d t|d}||fS |j}t|| jd}||fS | j d tjkr| jjj}t|| jd}| |j }||fS | j d tjkr| j }t }|D ]}t|tr||j dd	 qtt!j"|d
d}| j }||fS t#d| jj  d)zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         NrR   length)r"   utf-8encodingZuint8)r=   rE   r&   )$r=   r   INTUINTFLOATrM   DATETIMErg   r.   dtZ
tz_convertto_numpyrT   r'   r	   _datar
   rU   rV   r   rW   Z_ndarrayr   r/   rA   r>   _codesr@   rC   	bytearraystrextendencodenpZ
frombufferr,   )	r0   r=   Znp_arrarrbufferr?   bufr   objr1   r1   r2   ro   0  sN   	"



zPandasColumn._get_data_buffertuple[Buffer, Any] | Nonec                 C  s`  | j \}}t| jjtr7| jjjjd }tj	dt
j	tjf}| d du r'dS t| d t|d}||fS t| jjtrT| jjj}t|}tj	dt
j	tjf}||fS | jd tjkr| j }|dk}| }tjt|ftjd}t|D ]\}	}
t|
tr|n|||	< qwt|}tj	dt
j	tjf}||fS zt|  d}W t| ty   tdw )	z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   rR   Nrv   r<   shaper=   z! so does not have a separate maskzSee self.describe_null)r]   r'   r.   r=   r
   rT   rU   rV   r   rM   r   r   rB   rW   r   rg   r	   Z_maskr   rC   r   r   zerosZbool_	enumerater   _NO_VALIDITY_BUFFERrZ   r,   r   )r0   r[   invalidr   r=   r   maskr   validr   r   msgr1   r1   r2   rp   n  s@   



z!PandasColumn._get_validity_buffertuple[PandasBuffer, Any]c           	      C  s   | j d tjkrM| j }d}tjt|d ftjd}t	|D ]\}}t
|tr5|jdd}|t|7 }|||d < q t|}tjdtjtjf}||fS td)a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rR   r   rx   ry   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r=   r   rC   r.   r   r   r   rg   Zint64r   r'   r   r   r   r{   r   ZINT64r   rB   r   )	r0   r>   ptrrn   r   vr   r   r=   r1   r1   r2   rq     s&   

z PandasColumn._get_offsets_buffer)T)r    r!   r"   r#   r$   r%   )r$   r4   )r$   r9   )r$   ra   )N)re   rf   )r$   r   )r$   rs   )r$   r   )r$   r   )__name__
__module____qualname____doc__r3   r5   propertyr8   r   r=   r@   rQ   r]   r`   rc   rd   rk   rr   ro   rp   rq   r1   r1   r1   r2   r   H   s.    

!




%
>9r   )3
__future__r   typingr   r   numpyr   Zpandas._libs.libr   Zpandas._libs.tslibsr   Zpandas.errorsr   Zpandas.util._decoratorsr   Zpandas.core.dtypes.dtypesr	   Zpandasr(   r
   r   Zpandas.api.typesr   Zpandas.core.interchange.bufferr   r   Z*pandas.core.interchange.dataframe_protocolr   r   r   r   Zpandas.core.interchange.utilsr   r   r   r   r{   r|   r}   rM   rC   r~   rF   ZUSE_NANZUSE_SENTINELrX   rA   rS   rY   r   r   r1   r1   r1   r2   <module>   sJ    