o
    #j/                     @  s
  U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ eedduddZ%eeddvddZ&eeddwddZ'e(dd e) D Z*de+d< dd e*D Z,de+d< eeddxd!d"Z-eeddvd#d$Z.eeddvd%d&Z/eeddvd'd(Z0eeddvd)d*Z1eeddvd+d,Z2eeddvd-d.Z3eeddvd/d0Z4eeddvd1d2Z5eeddvd3d4Z6eeddvd5d6Z7eeddvd7d8Z8eeddvd9d:Z9eeddvd;d<Z:eeddvd=d>Z;ee<eddyd@dAZ=eeddvdBdCZ>	Ddzd{dHdIZ?edJdd|dLdMZ@d}dOdPZAd~dRdSZBdddWdXZCdd\d]ZDdd^d_ZEd`ejFdafddedfZG	dddsdtZHdS )    )annotationsN)bisect_right)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS_LATIN_CJK_HANGUL	_KATAKANA	_HIRAGANA_THAI_ARABIC_ARABIC_ISOLATED_FORM_ACCENT_KEYWORDS_ACCENTUATED)maxsize	characterstrreturnintc                 C  s   zt | }W n
 ty   Y dS w d}d|v r|tO }d|v r$|tO }d|v r,|tO }d|v r4|tO }d|v r<|tO }d|v rD|tO }d|v rT|t	O }d	|v rT|t
O }tD ]}||v rc|tO } |S qV|S )
zRCompute all name-based classification flags with a single unicodedata.name() call.r   ZLATINZCJKZHANGULZKATAKANAZHIRAGANAZTHAIZARABICzISOLATED FORM)unicodedataname
ValueErrorr   r   r   r   r   r   r   r   r   r   )r   descflagskw r'   Y/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/charset_normalizer/utils.py_character_flags&   s:   r)   boolc                 C     t t| t@ S N)r*   r)   r   r   r'   r'   r(   is_accentuatedI      r.   c                 C  s.   t | }|s	| S |d}tt|d dS )N r      )r!   decompositionsplitchrr    )r   Z
decomposedcodesr'   r'   r(   remove_accentN   s
   

r6   c                 c  s"    | ]\}}|j |j|fV  qd S r,   )startstop).0r"   Z	ord_ranger'   r'   r(   	<genexpr>[   s
    
r:   zlist[tuple[int, int, str]]_UNICODE_RANGES_SORTEDc                 C  s   g | ]}|d  qS )r   r'   )r9   er'   r'   r(   
<listcomp>_   s    r=   z	list[int]_UNICODE_RANGE_STARTS
str | Nonec                 C  s<   t | }tt|d }|dkrt| \}}}||k r|S dS )zK
    Retrieve the Unicode range official name from a single character.
    r
   r   N)ordr   r>   r;   )r   Zcharacter_ordidxr7   r8   r"   r'   r'   r(   unicode_rangeb   s   rB   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   is_latins   r/   rC   c                 C  s2   t | }d|v rdS t| }|d u rdS d|v S )NPTFPunctuationr!   categoryrB   r   character_categorycharacter_ranger'   r'   r(   is_punctuationx   s   
rK   c                 C  sB   t | }d|v sd|v rdS t| }|d u rdS d|v o |dkS )NSNTFZFormsZLorF   rH   r'   r'   r(   	is_symbol   s   
rN   c                 C  s$   t | }|d u r
dS d|v pd|v S )NFZ	EmoticonsZPictographs)rB   )r   rJ   r'   r'   r(   is_emoticon   s   rO   c                 C  s.   |   s| dv r
dS t| }d|v p|dv S )N>   <+>u   ｜TZ>   ZPdZPcZPo)isspacer!   rG   )r   rI   r'   r'   r(   is_separator   s   
rU   c                 C  s   |   |  kS r,   )islowerisupperr-   r'   r'   r(   is_case_variable   r/   rX   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   is_cjk   r/   rY   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   is_hiragana   r/   rZ   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   is_katakana   r/   r[   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   	is_hangul   r/   r\   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   is_thai   r/   r]   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   	is_arabic   r/   r^   c                 C  r+   r,   )r*   r)   r   r-   r'   r'   r(   is_arabic_isolated_form   r/   r_   c                 C  s   | t vS r,   )r   r-   r'   r'   r(   is_cjk_uncommon   s   r`   
range_namec                   s   t  fddtD S )Nc                 3  s    | ]}| v V  qd S r,   r'   )r9   keywordra   r'   r(   r:      s    z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   rc   r'   rc   r(   is_unicode_range_secondary   s   re   c                 C  s(   |   du o|  du o| dko| dkS )NFu   ﻿)rT   isprintabler-   r'   r'   r(   is_unprintable   s   
rh       sequencebytes | bytearraysearch_zonec                 C  s   t | ttfs	tt| }tt| dt|| jddd}t|dkr&dS |D ]'}|	 
dd}t D ]\}}||krD|    S ||krN|    S q6q(dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancebytes	bytearray	TypeErrorlenr   r   mindecodelowerreplacer   items)rj   rl   Zseq_lenresultsZspecified_encodingencoding_aliasencoding_ianar'   r'   r(   any_specified_encoding   s&   r      r"   c                 C  s    | dv pt td|  jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_16	utf_16_beutf_32	utf_16_leutf_8	utf_32_beZ	utf_8_sigutf_7	utf_32_le
encodings.)
issubclass	importlibimport_moduler   r	   )r"   r'   r'   r(   is_multi_byte_encoding	  s   
r   tuple[str | None, bytes]c                 C  sJ   t D ] }t | }t|tr|g}|D ]}| |r!||f    S qqdS )z9
    Identify and extract SIG/BOM in given sequence.
    )N    )r   rs   rt   
startswith)rj   iana_encodingZmarksmarkr'   r'   r(   identify_sig_or_bom  s   

r   r   c                 C  s   | dvS )N>   r   r   r'   )r   r'   r'   r(   should_strip_sig_or_bom0  s   r   Tcp_namestrictc                 C  sN   |   dd} t D ]\}}| ||fv r|  S q|r%td|  d| S )zIReturns the Python normalized encoding name (Not the IANA official name).rq   rr   zUnable to retrieve IANA for '')rz   r{   r   r|   r#   )r   r   r~   r   r'   r'   r(   	iana_name4  s   r   iana_name_aiana_name_bfloatc           	      C  s   t | st |r
dS td|  j}td| j}|dd}|dd}d}tdD ]}t|g}||||krA|d7 }q,|d S )Ng        r   rn   ro   r      r
   )r   r   r   r   rangert   ry   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bZcharacter_match_countiZto_be_decodedr'   r'   r(   cp_similarityE  s   


r   c                 C  s   | t v o	|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r'   r'   r(   is_cp_similarY  s   
r   Zcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringNonec                 C  s:   t | }|| t  }|t | || d S r,   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r"   r   r   loggerhandlerr'   r'   r(   set_logging_handlerd  s
   

r   	sequencesr   offsetsr   
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadrt   is_multi_byte_decoderdecoded_payloadGenerator[str, None, None]c	                 c  s&   |r|du r|D ]}	||	|	|  }
|
s d S |
V  q	d S |D ]p}	|	| }|t | d kr/q | |	|	|  }|rA|du rA|| }|j||rHdndd}
|r|	dkrt|d}|r|
d | |vrt|	|	d d	D ]#}| || }|r{|du r{|| }|j|dd}
|
d | |v r nqi|
V  q d S )
NF   rn   r   ro   r   r1      )rw   ry   rx   r   )r   r   r   r   r   r   r   r   r   r   chunkZ	chunk_endZcut_sequenceZchunk_partial_size_chkjr'   r'   r(   cut_sequence_chunksq  sD   

r   )r   r   r   r    )r   r   r   r*   )r   r   r   r   )r   r   r   r?   )ra   r   r   r*   )ri   )rj   rk   rl   r    r   r?   )r"   r   r   r*   )rj   rk   r   r   )r   r   r   r*   )T)r   r   r   r*   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r*   )r"   r   r   r    r   r   r   r   r,   )r   rk   r   r   r   r   r   r    r   r*   r   r*   r   rt   r   r*   r   r?   r   r   )I
__future__r   r   r   r!   bisectr   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   Z_multibytecodecr	   Zconstantr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r)   r.   r6   sortedr|   r;   __annotations__r>   rB   rC   rK   rN   rO   rU   rX   rY   rZ   r[   r\   r]   r^   r_   r`   rw   re   rh   r   r   r   r   r   r   r   INFOr   r   r'   r'   r'   r(   <module>   s    L"		"



