o
    #j8                     @  s  U d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 i de
dd	hd
e
ddhde
dhde
dhde
dhde
ddhde
ddhde
dhde
dhde
dhde
dhde
dhde
h d d!e
d"d#hd$e
d%hd&e
d	hd'e
d(he
d)he
d*he
d+he
d,he
d-he
d.he
d/d0he
d1he
d1he
d2d3he
d4he
d4hd5Zd6ed7< d	dd	d(d)d*d+d,d-dd.dd8Zd9ed:< dd?d@ZddAdBZeZi dddCdDdEdFddGdddHdddIdJdKd"ddLdMdNd&dOdPdQdRdSdTdUdVdWdXdYdZi d[d\d]d^dd_d`d
d1dad4dbdcddd)ded	dfd+dgd-dhd,didjdkdldmdndodpdqdrdsZd9edt< ddudvZdwZdxedy< dzZdxed{< d|d} fdddZeeZd6ed< dddZi Zd6ed< e D ] \ZZe	epeZe
dd eD Zeee
 eB ee< qeedd} dZd6ed< dddZ dddZ!e
ddhZ"ded< dddZ#dddZ$dS )a$  Encoding equivalences and name remapping.

This module defines:

1. **Directional supersets** for accuracy evaluation: detecting a superset
   encoding when the expected encoding is a subset is correct (e.g., detecting
   UTF-8 when expected is ASCII), but not the reverse.

2. **Bidirectional equivalents**: groups of encodings where detecting any
   member when another member was expected is considered correct.  This
   includes UTF-16/UTF-32 endian variants (which encode the same text with
   different byte order) and ISO-2022-JP branch variants (which are
   compatible extensions of the same base encoding).

3. **Preferred superset mapping** for the ``prefer_superset`` API option:
   replaces detected ISO/subset encoding names with their Windows/CP superset
   equivalents that modern software actually uses.

4. **Compatibility names** for the default ``compat_names=True`` mode: maps
   internal Python codec names to the names chardet 5.x/6.x returned,
   preserving backward compatibility for callers that compare encoding
   strings directly.
    )annotationsN)Callable)DetectionDictlookup_encodingASCIIzutf-8cp1252zTIS-620
iso8859-11Zcp874zISO-8859-11ZGB2312gb18030ZGBKZBig5	big5hkscscp950Z	Shift_JIScp932shift_jis_2004zShift-JISX0213zEUC-JPeuc_jis_2004zEUC-JISX0213zEUC-KRcp949ZCP037cp1140zISO-2022-JP>   iso2022_jp_2iso2022_jp_extiso2022_jp_2004zISO2022-JP-1r   r   zISO2022-JP-3r   z
ISO-8859-1z
ISO-8859-2cp1250cp1251cp1256cp1253cp1255cp1254cp1257z	utf-16-lez	utf-16-bezutf-16z	utf-32-lez	utf-32-bezutf-32)
ISO-8859-5z
ISO-8859-6
ISO-8859-7
ISO-8859-8
ISO-8859-9zISO-8859-13UTF-16z	UTF-16-LEz	UTF-16-BEUTF-32z	UTF-32-LEz	UTF-32-BEdict[str, frozenset[str]]	SUPERSETS)asciieuc_kr	iso8859-1z	iso8859-2	iso8859-5z	iso8859-6	iso8859-7	iso8859-8	iso8859-9r	   z
iso8859-13tis-620dict[str, str]PREFERRED_SUPERSETresultr   mappingreturnc                 C  s(   |  d}t|tr| ||| d< | S )zGReplace the encoding name using *mapping*, modifying *result* in-place.encoding)get
isinstancestr)r.   r/   enc r6   U/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/chardet/equivalences.py_remap_encodingl   s   

r8   c                 C  
   t | tS )a7  Replace the encoding name with its preferred Windows/CP superset.

    Modifies the ``"encoding"`` value in *result* in-place and returns *result*
    for fluent chaining.

    :param result: A detection result dict containing an ``"encoding"`` key.
    :returns: The same *result* dict, modified in-place.
    )r8   r-   r.   r6   r6   r7   apply_preferred_supersett      
r;   cp855ZIBM855cp866ZIBM866ZCP949r%   ZGB18030hzz
HZ-GB-2312
iso2022_krzISO-2022-KRr&   r'   r   r(   r   r)   r   r*   r   johabZJohabzkoi8-rzKOI8-Rzmac-cyrillicZMacCyrillicz	mac-romanZMacRomanZ	SHIFT_JISr+   r    r!   z	utf-8-sigz	UTF-8-SIGzWindows-1251zWindows-1252zWindows-1253zWindows-1254zWindows-1255kz1048ZKZ1048z	mac-greekZMacGreekzmac-icelandZ
MacIcelandz
mac-latin2Z	MacLatin2zmac-turkishZ
MacTurkish_COMPAT_NAMESc                 C  r9   )a7  Convert internal codec names to chardet 5.x/6.x compatible names.

    Modifies the ``"encoding"`` value in *result* in-place and returns *result*
    for fluent chaining.

    :param result: A detection result dict containing an ``"encoding"`` key.
    :returns: The same *result* dict, modified in-place.
    )r8   rC   r:   r6   r6   r7   apply_compat_names   r<   rD   ))r   r   r   tuple[tuple[str, ...], ...]BIDIRECTIONAL_GROUPS))skcs)ukrubgbe)msid)nodasvLANGUAGE_EQUIVALENCESc                 C  s   | S Nr6   )xr6   r6   r7   <lambda>   s    rU   groups	normalizeCallable[[str], str]c                   s>   i }| D ]}t  fdd|D }|D ]}|| |< qq|S )zJBuild a lookup: key -> frozenset of all equivalent keys in the same group.c                 3  s    | ]} |V  qd S rS   r6   ).0nrW   r6   r7   	<genexpr>   s    z%_build_group_index.<locals>.<genexpr>)	frozenset)rV   rW   r.   groupZnormednamer6   r[   r7   _build_group_index   s   r`   _LANGUAGE_EQUIVexpectedr4   detectedboolc                 C  s&   | |krdS t | }|duo||v S )a  Check whether *detected* is an acceptable language for *expected*.

    Returns ``True`` when *expected* and *detected* are the same ISO 639-1
    code, or belong to the same equivalence group in
    :data:`LANGUAGE_EQUIVALENCES`.

    :param expected: Expected ISO 639-1 language code.
    :param detected: Detected ISO 639-1 language code.
    :returns: ``True`` if the languages are equivalent.
    TN)ra   r2   )rb   rc   r^   r6   r6   r7   is_language_equivalent   s   
re   _NORMALIZED_SUPERSETSc                 c  s    | ]	}t |p	|V  qd S rS   r   )rY   sr6   r6   r7   r\   	  s    r\   c                 C  s   t | p| S rS   r   )rZ   r6   r6   r7   rU     s    r[   _NORMALIZED_BIDIR
str | Nonec                 C  st   | du r|du S |du rdS t | p|  }t |p| }||kr$dS |tv r0|t| v r0dS |tv o9|t| v S )a  Check whether *detected* is an acceptable answer for *expected*.

    Acceptable means:

    1. Exact match (after normalization), OR
    2. Both belong to the same bidirectional byte-order group, OR
    3. *detected* is a known superset of *expected*.

    :param expected: The expected encoding name, or ``None`` for binary files.
    :param detected: The detected encoding name, or ``None``.
    :returns: ``True`` if the detection is acceptable.
    NFT)r   lowerrh   rf   )rb   rc   norm_expnorm_detr6   r6   r7   
is_correct  s   
rm   textc                 C  s    t d| }ddd |D S )z4NFKD-normalize *text* and strip all combining marks.ZNFKD c                 s  s    | ]
}t |s|V  qd S rS   )unicodedata	combining)rY   cr6   r6   r7   r\   8      z#_strip_combining.<locals>.<genexpr>)rp   rW   join)rn   Znfkdr6   r6   r7   _strip_combining5  s   ru   )   ¤   €)rw   rv   zfrozenset[tuple[str, str]]_EQUIVALENT_SYMBOL_PAIRSabc                 C  s,   | |krdS | |ft v rdS t| t|kS )u   Return True if characters *a* and *b* are functionally equivalent.

    Equivalent means:
    - Same character, OR
    - Same base letter after stripping combining marks, OR
    - An explicitly listed symbol equivalence (e.g. ¤ ↔ €)
    T)rx   ru   )ry   rz   r6   r6   r7   _chars_equivalentE  s
   r{   databytesc              	   C  s   |du r|du S |du rdS t |p| }t |p| }||kr$dS z| |}| |}W n ttfy<   Y dS w ||krCdS t|t|krMdS tdd t||ddD S )u  Check whether *detected* produces functionally identical text to *expected*.

    Returns ``True`` when:

    1. *detected* is not ``None`` and both encoding names normalize to the same
       codec, OR
    2. Decoding *data* with both encodings yields identical strings, OR
    3. Every differing character pair is functionally equivalent: same base
       letter after stripping combining marks, or an explicitly listed symbol
       equivalence (e.g. ¤ ↔ €).

    Returns ``False`` if *detected* is ``None``, either encoding is unknown,
    or either encoding cannot decode *data*.

    :param data: The raw byte data that was detected.
    :param expected: The expected encoding name, or ``None`` for binary files.
    :param detected: The detected encoding name, or ``None``.
    :returns: ``True`` if decoding with *detected* yields functionally identical
        text to decoding with *expected*.
    NFTc                 s  s    | ]
\}}t ||V  qd S rS   )r{   )rY   ry   rz   r6   r6   r7   r\     rs   z*is_equivalent_detection.<locals>.<genexpr>)strict)r   rj   decodeUnicodeDecodeErrorLookupErrorlenallzip)r|   rb   rc   rk   rl   Ztext_expZtext_detr6   r6   r7   is_equivalent_detectionU  s&   
r   )r.   r   r/   r,   r0   r   )r.   r   r0   r   )rV   rE   rW   rX   r0   r"   )rb   r4   rc   r4   r0   rd   )rb   ri   rc   ri   r0   rd   )rn   r4   r0   r4   )ry   r4   rz   r4   r0   rd   )r|   r}   rb   ri   rc   ri   r0   rd   )%__doc__
__future__r   rp   collections.abcr   Zchardet.pipeliner   Zchardet.registryr   r]   r#   __annotations__r-   r8   r;   Zapply_legacy_renamerC   rD   rF   rR   r`   ra   re   rf   itemsZ_subsetZ
_supersets_keyZ_normedr2   rh   rm   ru   rx   r{   r   r6   r6   r6   r7   <module>   sF   




	








3

	
 !"#$
(


#
