o
    $j                     @   sX  d Z ddlZddlZddlmZmZmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZ dd Zdd Zejjejd	d
dgejdddgejdddgejjdddddggedejjddddggedejdd
dgejdddgejdd
dgejdd
dgejdd
dgdd Zejd d
dgd!d" Zejd#d$d%gg d&gd'd( Zejd#d$d%gg d&gd)d* Zd+d, Zd-d. Zejd/d0 Z d1d2 Z!d3d4 Z"d5d6 Z#d7d8 Z$ejd9g d:ejdddgejd;g d<ejd=d
dgejd>d
dgd?d@ Z%ejdAd
dgejdBddg dCg dDg dEfd
dg dFg dGg dHfd
d
g dIg dJg dKfgdLdM Z&ejdNdO Z'ejdPddddQg dRg dSfd
d
ddQg dTg dUfd
dddQg dVg dWfd
dd
dXg dYg dWfgdZd[ Z(ejd\d] Z)ejd^ddg d_g d`fdd
g dag dbfd
dg dcg ddfd
d
g deg dffgdgdh Z*ejdidj Z+ejdkd
dldlge
j,g dmg dndofdg dpe
e	dlge	g dqe	drdsej-ggg dtg dug dvgg dndwfgejdddgdxdy Z.ejd=dd
gejdzdd
gejd{ddQej/g d|ej0d}fd
dXe/g d~fgdd Z1dd Z2ejd=d
dgejd{ddQej/g d|ej0d}fd
dXe/g d~fgdd Z3ejd=d
dgejd{ddQej/g dej0d}fd
dXe/g dfgdd Z4ejd=d
dgejddg dfd
g dfgejd{ddQej/g dej0d}fd
dXe/g dfgdd Z5ejd=dd
gejdzdd
gejd{ddQej/g d|ej0d}fd
dXe/g d~fgdd Z6ejdddQg dfd
dXg dffgdd Z7ejdde8dg dfde8ddg g dfgejd=dd
gdd Z9ejdddgdd Z:dd Z;dd Z<dd Z=dd Z>dd Z?ejd d
dgdd Z@dd ZAejdd
dgejdAd
dgdd ZBejdd
dgejdAd
dgdd ZCdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    N)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetime)Versionc                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer   tmassert_series_equal)dfresultZdf_mi_expectedZmi_expectedexpected r!   o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/pandas/tests/groupby/methods/test_value_counts.py.tests_value_counts_index_names_category_column   s   
r#   c                 C   s   t ddd}ttjdtd|tjd||tjdd|d |d}| rm|d d	|d< tj	|j
dd d
df< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< |S )Nz
2015-08-24
   )Zperiods   abcdr   )1st2nd3rdr)   float   r'         r(            	   )r	   r   nprandomZdefault_rngchoicelistZintegersr   nanloc)	seed_nansnmdaysframer!   r!   r"   seed_df.   s   r=   r8   TFnum_rowsr$   2   max_int      keysr'   r(   )Zidsbinsisortznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   t | ||}dd }|||	|
|d}|j||d}|d jdi |}|j||d}|d jtjfi |}|jjd d dg |j_||}t|||f\}}t	
| |  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r5   mapr   Zget_level_valuesrangeZnlevelsr   from_arraysrK   )r   Zarrr!   r!   r"   rebuild_index_   s   z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerG   rH   rI   rD   rG   r)   r!   )r=   r   r   applyr   r   rK   renamerL   r   r   
sort_index)r8   r>   r@   rC   rD   rE   rP   r   rG   rH   rI   r   rO   kwargsgrleftrightr!   r!   r"    test_series_groupby_value_countsE   s    
rZ   utcc                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NiGI]i)J]iJ]iK]i)<M]iU=M]iN]appler^   bananar_   oranger`   pear	TimestampFoodr,   rc   sr[   unitDatetime1Dfreqkeyrd   r   )r   dropr
   r   r   r   rU   rS   r   r   rK   rT   r   r   )r[   r   dfgr   r    r!   r!   r"   -test_series_groupby_value_counts_with_grouperz   s   	
ro   r   AB)rp   rq   Cc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   rR   r   )dtyper   rJ   )r   r   r   r   rs   r   rN   lenr   r   r   r   r   rn   r   r    r!   r!   r"   &test_series_groupby_value_counts_empty   s   
rv   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   rR   )r   rM   rt   r   r   r   r   ru   r!   r!   r"   (test_series_groupby_value_counts_one_row   s
   rx   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r{   orderedrs   r   rw   r   r   )r   r   r   r   r   rN   r2   arrayr   r   r   )re   r   r    r!   r!   r"   /test_series_groupby_value_counts_on_categorical   s   r   c                  C   s   t g dg dg dd} | jddgddd	 }|jdd}td
dgddgg dgg dg dg dgg dd}tg d|dd}t|| d S )Nmaler   r   r   r   r   lowmediumhighr   r   r   r   FRr   r   r   r   r   	educationr   r   r   FrQ   r   r   r   r   r   )r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r%   r   r%   r   r   r   levelscodesrK   r   r   r   r%   r   r   r   )r   r   r   r   r   r   r   )r   gbr   r   r    r!   r!   r"   (test_series_groupby_value_counts_no_sort   s   r   c                   C   s   t g dg dg ddS )Nr   r   r   r   r   r!   r!   r!   r"   education_df   s   r   c                 C   s|   d}t jt|d | jddd}W d    n1 sw   Y  tjtdd |  W d    d S 1 s7w   Y  d S )Nz+DataFrame.groupby with axis=1 is deprecatedmatchr   r   axisr   )r   assert_produces_warningFutureWarningr   pytestraisesNotImplementedErrorr   )r   msggpr!   r!   r"   	test_axis   s   
"r   c                 C   sL   |  d}tjtdd |jdgd W d    d S 1 sw   Y  d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   )r   r   r!   r!   r"   test_bad_subset   s   
"r   c                 C   sv   t tjt dkr|tjjddd | dddg jdd	}t	g d
t
jg dg dddd}t|| d S )N1.25Ypandas default unstable sorting of duplicatesissue with numpy>=1.25 with AVX instructionsFreasonstrictr   r   r   TrP   )      ?      ?r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   rF   r}   )r   r2   __version__applymarkerr   markxfailr   r   r   r   from_tuplesr   r   )r   requestr   r    r!   r!   r"   
test_basic   s&   	
r   c                 C   s   | | j |||dS )NrP   rG   rH   )r   )r   rC   rP   rG   rH   r!   r!   r"   _frame_value_counts  s   r   r   columnr~   functionzsort, ascending))FN)TT)TFas_indexr<   c
                    s  t tjt dkr|r|r|r|tjjddd d d j fddd| }
 j|
|d	}|d
dg j	|||d}|r|dkrDt
nd }d}tj||d |td
dg|||}W d    n1 sew   Y  |rtt|| d S |rxdnd}| jd|idd}|dkr|jddidd}t|d dd|d< n|dkr|d dk|d< nt|d dd|d< t|| d S  d
 d  d   d< |d j	|||d}||_|r,|jjdd}|d jdjd|d
< |d jdjd|d< |d= |jdd idd}t||_|j d r$d g|jjdd   |j_t|| d S |dd
|d jdjd |dd|d jdjd |	rY| ddd}|d= t|| d S )Nr   r   Fr   r   c                    s    d |  dkS )Nr   r   r!   )xr   r!   r"   <lambda>K      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>r   )byr   r   r   r   r   z7DataFrameGroupBy.apply operated on the grouping columnsr   rF   r   r   r   r   level_0r   r   r   -Zbothr   r%   str)r   r   )!r   r2   r   r   r   r   r   valuesr   r   r   r   r   rS   r   r   reset_indexrT   whereassert_frame_equalr   r   Zto_framer   splitgetr   r   r   ZisnarK   insertr   )r   r   rP   r   rG   rH   r   r<   r   using_infer_stringr   r   r   warnr   r    Zindex_frameZindex_frame2r!   r   r"   $test_against_frame_and_seriesgroupby  sr    

""r   rP   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   r%   r,      r   )r   r,   r   r,   r   )r,   r   r   r%   r   )r%   r   r   r   r   )r,   r   r,   r   r   )r   r   r%   r   r,   )r   r   r   r   r%   )r   r,   r   r   r,   c	                    s  |}	 |	j |	_jddgddd}
|
d j|||d}t }dD ]  fdd	|D | < | |	}|j |	|_q'|r]||d
< |d
  |  < |	dkr\|d
  |d
< n||d< |	dkrm|d  |d< |r||	tkr|| dddd}t|| d S )Nr   r   Fr   rG   r   r   r   c                       g | ]}  | qS r!   r!   .0rowr   r   r!   r"   
<listcomp>      z!test_compound.<locals>.<listcomp>rF   zstring[pyarrow]r   r   )	r   r   r   r   r   Zconvert_dtypesobjectr   r   )r   rP   rG   rH   expected_rowsZexpected_countZexpected_group_sizeZany_string_dtyper   rs   r   r   r    r!   r   r"   test_compound  s4   


r   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )r%   r   r      )r%   r   r   r   rl   Znum_legsZ	num_wings)Zfalcondogcatantr   r   r!   r!   r!   r"   
animals_df  s   r   z?sort, ascending, normalize, name, expected_data, expected_indexr   r   r%   r   )r   r   r   )r%   r   r   r%   r   r   r   r   r%   )r   )r%   r   r   r   r%   r   r   )r   )r   r%   r   )r   r%   r   rF   )r   r   r   c           
      C   s^   | j |||d}t|tj|g dd|d}t|| | dj |||d}	t|	| d S )N)rG   rH   rP   r   rJ   r}   rl   )r   r   r   rN   r   r   r   )
r   rG   rH   rP   r   expected_dataexpected_indexresult_framer    result_frame_groupbyr!   r!   r"   test_data_frame_value_counts  s   
r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r,   r%   rA   r0   r.   )rp   rq   rr   D)r2   r6   r   )r9   r!   r!   r"   nulls_df  s   r   z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   r,   rA   r.   r   r0   r%   r   )	r   r         ?r   r   r   r   r   r   )r   r   r,   rA   r%   r   )r   r   r   r   r   r   )r   r   rA   r.   r   r0   )r   r   r   r   r   r   )r   r   rA   )r   r   r   c                    s   t tjt dkr|s|tjjddd jddg|d}|jdd|d	}t	 }j
D ]  fd
d|D | < q-t|}	t||	dd}
t||
 d S )Nr   r   Fr   rp   rq   )rI   T)rP   rG   rI   c                    r   r!   r!   r   r   r   r!   r"   r     r   z,test_dropna_combinations.<locals>.<listcomp>rF   r}   )r   r2   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   Zgroup_dropnaZcount_dropnar   expected_valuesr   r   r   r   r   r    r!   r   r"   test_dropna_combinations  s   	

r   c                 C   s    t g dg dd| | dgdS )Nr   )JohnAnner   BethSmithLouiserl   Z
first_nameZmiddle_namer   )Znulls_fixturer!   r!   r"   names_with_nulls_df  s   
r   z%dropna, expected_data, expected_indexr   )r   r   )r   r   )r   r   r   rJ   r   )r   r   r   r   r   )r   r   r   r   )r   r   r%   r%   )r%   r   r   r%   r   c           	      C   s`   | j ||d}t|||d}|r|tt| }t|| | dj ||d}t|| d S )N)rI   rP   r}   rl   )r   r   r*   rt   r   r   r   )	r   rI   rP   r   r   r   r   r    r   r!   r!   r"   #test_data_frame_value_counts_dropna&  s   !
r   observedznormalize, name, expected_data)r%   r   r   r   r   r   r   r   r   r   r   r   rs   )r   r   r           r   r   r   r   r   r   r   r   c                 C   s   t tjt dkr|tjjddd | djd||d}|j	|d}t
jg d	g d
d}	t||	|d}
tdD ]}|
jjt|
jj| |d|
_q;|rWt||
 d S |
j|r]dndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   r}   r,   levelrF   r   r   )r   r2   r   r   r   r   r   r   r   r   r   r   r   rM   r   
set_levelsr   r   r   r   r   r   r   r   r   rP   r   r   r   r   r   r   expected_seriesir    r!   r!   r"   =test_categorical_single_grouper_with_only_observed_categoriesY  s<   



r  c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g dd|d}	t	d	D ] }
t
|	jj|
 }|
d
krI|| d jj}|	jj||
d|	_q2|r]t||	 d S |	j|d}t|| d S )Nr   r   ASIAr   r   r   rJ   r}   r,   r   r  r
  )copyr   r   Zadd_categoriesr   r   r   r   r   rM   r   r   r   Zset_categoriesr{   r  r   r   r   r   )r   r   r   r   rP   r   r   r   r   r  r  Zindex_levelr    r!   r!   r"   !assert_categorical_single_grouper  s.   
r  c              	   C   sJ   t tjt dkr|tjjddd g d}t| |d||||d d S )Nr   r   Fr   r   Tr   r   r   r   rP   r   r   r   r2   r   r   r   r   r   r  r   r   rP   r   r   r   r   r!   r!   r"   -test_categorical_single_grouper_observed_true  s"   

r  )r%   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   c              	   C   sJ   t tjt dkr|tjjddd g d}t| |d||||d d S )Nr   r   Fr   )r   r   r   r  r  r  r   r   r  r  r  r  )r  r   r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   r  r  r  r!   r!   r"   .test_categorical_single_grouper_observed_false  s"   ,

r  zobserved, expected_index)r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r  r  r  r  r  )r   r   r%   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r/||dk n|tj|g dd|d	}	td
D ]}
|	jj	t
|	jj|
 |
d|	_q@|r\t||	 d S |	j|rbdndd}t|| d S )Nr   r   r   r   r   r   )r   r   r   rJ   r}   r%   r  rF   r   r
  )r  r   r   r   r   r   r   rM   r   r  r   r   r   r   r   r   )r   r   r   r   rP   r   r   r   r   r  r  r    r!   r!   r"   "test_categorical_multiple_groupersX  s2   7


r  c                 C   s   t tjt dkr|tjjddd |  } | d d| d< | d d| d< | j	d||d	}|j
|d
}g d}	t|tj|	g dd|d}
tddD ]}|
jjt|
jj| |d|
_qO|rkt||
 d S |
j|rqdndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   rJ   r}   r   r,   r  rF   r   r
  )r   r2   r   r   r   r   r   r  r   r   r   r   r   r   rM   r   r  r   r   r   r   r   r   r  r!   r!   r"   test_categorical_non_groupers  s>   


r  z*normalize, expected_label, expected_valuesr   c                 C   s   t g dg dd}|jg dddd gdd	}|jd
| d}t dtjg dtddg ddg ddg d||i}t|| d S )Nr   r   r%   r,   )rp   rq   )r   rA   r   rp   c                 S   s   | dkrdS dS )Nr   r.   r0   r!   )r  r!   r!   r"   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   TrG   rP   r   )r   r   rA   r   r   Zlevel_2)r0   r0   r.   rq   )r   r,   r%   )r   r   r   r2   r~   intr   r   )rP   expected_labelr   r   r   r   r    r!   r!   r"   test_mixed_groupings  s   		r$  ztest, columns, expected_namesrepeatZabbde)ry   Ndrz   rz   er	  r&   level_1)ry   Nr&  rz   cr(  c           
      C   s   t g dg dg|d}ddg}dtjddgtjd	d
g}|j||d }|r<tdtj||ddd}t	
|| d S dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )N)r   r,   rA   r.   r1   )r%   r   r   r0   r$   r   )r   r   r.   r,   rA   r1   )r%   r   r0   r   r   r$   ry   r   r   r   r&  r   r   rJ   r   r}   c                 S   s   g | ]	}t |d g qS )r   )r5   r   r!   r!   r"   r   .  s    z0test_column_label_duplicates.<locals>.<listcomp>r(  )r   r2   r~   int64r   r   r   r   r   r   r   r5   appendr   )
testr   Zexpected_namesr   r   r   rC   r   r    Zexpected_columnsr!   r!   r"   test_column_label_duplicates  s(   
r-  znormalize, expected_labelc                 C   sn   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    d S 1 s0w   Y  d S )Nr  ry   rz   r   Fr   zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rP   r#  r   r   r!   r!   r"   test_result_label_duplicates6  s   	"r.  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nry   r   r   r%   rJ   r   r   )r   r   r2   r~   r*  r   r   r   r   r   r   )r   r   r   r    r!   r!   r"   test_ambiguous_groupingG  s   r/  c                  C   sj   t g dg ddg dd} d}tjt|d | djdgd	 W d    d S 1 s.w   Y  d S )
Nry   rz   r)  r   yr2  c1c2r   r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r4  r   r   r   r   r   r   r   r   r   r!   r!   r"   "test_subset_overlaps_gb_key_raisesR  
   "r9  c                  C   sj   t g dg ddg dd} d}tjt|d | djd	gd
 W d    d S 1 s.w   Y  d S )Nr0  r1  r3  r6  r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r4  c3r   r7  r8  r!   r!   r"   !test_subset_doesnt_exist_in_frameZ  r:  r<  c                  C   sp   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgddd}t|| d S )Nr0  r1  r3  r6  r   r   r  r5  r   r   r%   r   r2  rJ   r   r   r   r   r   r   r   rN   r   r   r   r   r    r!   r!   r"   test_subsetb  s   r?  c                  C   s   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg dddd}t|| d S )N)ry   r   r   )rz   r2  r2  r6  )r4  r5  r5  )r   r   r   r  r5  r   r   r%   r   r2  )Nr5  r5  rJ   r   r   r=  r>  r!   r!   r"   test_subset_duplicate_columnsn  s   r@  c           	      C   s   t g dg dddg}t|d | ddj||d< |td	dd
}| }tg d| d|}|d  }t	||g dgg dt
dg dgg dd}td|dd}t|| d S )Nr\   r]   rb   r,   rc   re   rf   rh   ri   rj   )z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)r[   )r^   r_   r`   ra   )r   r   r   r%   r%   r,   r   )r   r   r   r%   r%   r,   )rh   rc   rd   r   r   r   r   )r   rm   r
   dtZas_unitr   r   r   uniquer   rM   r   r   r   )	r[   rg   r   r   r   datesZ
timestampsr   r    r!   r!   r"   test_value_counts_time_grouper  s.   	rD  c                  C   sj   t g dg dg dd} | jddgddd}|d	  }t g dg dg ddd
}t|| d S )N)ry   ry   ry   )ry   ry   r&  r0  r  r   r%   Fr   r,   )r   r%   r,   r   )r   r   r   r   r   )r   r   r   r    r!   r!   r"   !test_value_counts_integer_columns  s   rE  vc_sortc           
      C   s   t g dg dd}|jd| d}|j||d}|r g d}ng d}td	d
gddggg dg dgddgd}t|||r@dndd}| rM|rMg d}	n| rV|sVg d}	n| s_|r_g d}	ng d}	||	}t|| d S )Nr%   r   r   r   r,   r   r,   r,   ry   r   ry   rQ   r!  )UUUUUU?UUUUUU?r   r   r   r%   r,   r   )r   r   r   )r   r   r   r   r   rF   r   r   )r   r   r%   )r   r%   r   )r%   r   r   )r   r   r   r   r   taker   r   )
rG   rF  rP   r   r   r   r   r   r    takerr!   r!   r"   test_value_counts_sort  s&   
"



rN  c           
      C   s   t g dg dddd}|jd| dd}|j||d	}|r#g d
}ng d}|r+dnd}t dtg ddtg d||iddg| }| rO|rOg d}	n| rX|sXg d}	n| sa|rag d}	ng d}	||	}t|| d S )NrG  rH  rI  r   r   ry   T)rG   r   r!  )rJ  rK  r   r   )r%   r   r   r   rF   r   )r   r   r%   r%   r   )r,   r   r,   r   )r   r   r%   r,   )r   r%   r   r,   )r%   r,   r   r   )r   r   r   r   Z	set_indexrL  r   r   )
rG   rF  rP   r   r   r   r   r   r    rM  r!   r!   r"   "test_value_counts_sort_categorical  s0   




rO  )D__doc__numpyr2   r   Zpandasr   r   r   r   r   r   r   r	   r
   Zpandas._testingZ_testingr   Zpandas.util.versionr   r#   r=   r   ZslowZparametrizereprrZ   ro   rv   rx   r   r   Zfixturer   r   r   r   r   r   r   r   r   r   r   r   rN   r6   r   r~   r*  r  r  r  r  r  r  r$  r5   r-  r.  r/  r9  r<  r?  r@  rD  rE  rN  rO  r!   r!   r!   r"   <module>   s   ,*




 \*





	
:#*
&0 %<



!