o
    #j2                     @  s   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZmZ G d	d
 d
ZG dd dZeeef Zee ZG dd dZdS )    )annotations)aliases)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                   @  sH  e Zd Z		dIdJddZdKddZdKddZedLddZdMddZdMddZ	dNdd Z
edMd!d"ZedOd$d%ZedPd&d'ZedPd(d)ZedOd*d+ZedMd,d-ZedLd.d/ZedLd0d1ZedLd2d3ZedLd4d5ZedQd6d7ZedRd9d:ZedPd;d<ZedOd=d>ZedOd?d@ZdSdTdDdEZedUdGdHZdS )VCharsetMatchNpayloadbytes | bytearrayguessed_encodingstrmean_mess_ratiofloathas_sig_or_bombool	languagesCoherenceMatchesdecoded_payload
str | Nonepreemptive_declarationc                 C  sL   || _ || _|| _|| _|| _d | _g | _d| _d | _d | _	|| _
|| _d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leavesZ_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r    r+   Z/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/charset_normalizer/models.py__init__   s   

zCharsetMatch.__init__otherobjectreturnc                 C  s>   t |tst |trt|| jkS dS | j|jko| j|jkS )NF)
isinstancer   r   r   encodingfingerprintr*   r.   r+   r+   r,   __eq__)   s
   

zCharsetMatch.__eq__c                 C  s   t |tstt| j|j }t| j|j }|dk r%|dkr%| j|jkS |dk r@|dkr@t| jtkr:| j|jk S | j	|j	kS | j|jk S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gzt?g{Gz?)
r1   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r*   r.   Zchaos_differenceZcoherence_differencer+   r+   r,   __lt__0   s   
zCharsetMatch.__lt__c                 C  s   dt t| t | j  S )Ng      ?)r:   r   rawr*   r+   r+   r,   r;   F   s   zCharsetMatch.multi_byte_usagec                 C  sV   | j d u r(t| j| jd| _ | jr(| jdkr(| j r(| j d dkr(| j dd  | _ | j S )Nstrictutf_7r   u   ﻿r
   )r(   r   r   r    r#   r>   r+   r+   r,   __str__J   s   

zCharsetMatch.__str__c                 C  s   d| j  d| j dS )Nz<CharsetMatch 'z' fp(z)>)r2   r3   r>   r+   r+   r,   __repr__Z   s   zCharsetMatch.__repr__Nonec                 C  s8   t |tr	|| krtd|jd |_| j| d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r1   r   r6   format	__class__r(   r%   appendr4   r+   r+   r,   add_submatch]   s   zCharsetMatch.add_submatchc                 C     | j S N)r    r>   r+   r+   r,   r2   h      zCharsetMatch.encoding	list[str]c                 C  sD   g }t  D ]\}}| j|kr|| q| j|kr|| q|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr2   rF   )r*   Zalso_known_asupr+   r+   r,   encoding_aliasesl   s   


zCharsetMatch.encoding_aliasesc                 C  rH   rI   r#   r>   r+   r+   r,   bomy   rJ   zCharsetMatch.bomc                 C  rH   rI   rP   r>   r+   r+   r,   byte_order_mark}   rJ   zCharsetMatch.byte_order_markc                 C  s   dd | j D S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                 S  s   g | ]}|d  qS )r   r+   ).0er+   r+   r,   
<listcomp>       z*CharsetMatch.languages.<locals>.<listcomp>r"   r>   r+   r+   r,   r      s   zCharsetMatch.languagesc                 C  sp   | j s1d| jv r
dS ddlm}m} t| jr|| jn|| j}t|dks+d|v r-dS |d S | j d d S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiZEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r"   could_be_from_charsetZcharset_normalizer.cdrY   rZ   r   r2   r:   )r*   rY   rZ   r   r+   r+   r,   language   s   
zCharsetMatch.languagec                 C  rH   rI   )r!   r>   r+   r+   r,   r8      rJ   zCharsetMatch.chaosc                 C  s   | j sdS | j d d S )Nr   r   r
   rW   r>   r+   r+   r,   r9      s   zCharsetMatch.coherencec                 C     t | jd ddS Nd      )ndigits)roundr8   r>   r+   r+   r,   percent_chaos      zCharsetMatch.percent_chaosc                 C  r^   r_   )rc   r9   r>   r+   r+   r,   percent_coherence   re   zCharsetMatch.percent_coherencec                 C  rH   )z+
        Original untouched bytes.
        )r   r>   r+   r+   r,   r=      s   zCharsetMatch.rawlist[CharsetMatch]c                 C  rH   rI   )r%   r>   r+   r+   r,   submatch   rJ   zCharsetMatch.submatchc                 C     t | jdkS Nr   )r:   r%   r>   r+   r+   r,   has_submatch   s   zCharsetMatch.has_submatchc                 C  s@   | j d ur| j S dd t| D }ttdd |D | _ | j S )Nc                 S  s   g | ]}t |qS r+   )r   )rS   charr+   r+   r,   rU      rV   z*CharsetMatch.alphabets.<locals>.<listcomp>c                 S  s   h | ]}|r|qS r+   r+   )rS   rr+   r+   r,   	<setcomp>   rV   z)CharsetMatch.alphabets.<locals>.<setcomp>)r$   r   sortedlist)r*   Zdetected_rangesr+   r+   r,   	alphabets   s
   
zCharsetMatch.alphabetsc                 C  s   | j gdd | jD  S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                 S  s   g | ]}|j qS r+   )r2   )rS   mr+   r+   r,   rU      s    z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>)r    r%   r>   r+   r+   r,   r\      s   z"CharsetMatch.could_be_from_charsetutf_8r2   bytesc                   s~    j du s
 j |kr<| _ t } jdur5 j dvr5tt fdd|dd dd}||dd  }||d _ jS )	z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8rs   c                   s<   | j |  d |  d  |  d t jddS )Nr   r
   _-)stringspanreplacegroupsr   r'   )rr   r>   r+   r,   <lambda>   s    
z%CharsetMatch.output.<locals>.<lambda>i    r
   )countrz   )r'   r   r)   lowerr   r   encoder&   )r*   r2   decoded_stringZpatched_headerr+   r>   r,   output   s    



zCharsetMatch.outputintc                 C  s   t t| S )z]
        Retrieve a hash fingerprint of the decoded payload, used for deduplication.
        )hashr   r>   r+   r+   r,   r3      s   zCharsetMatch.fingerprint)NN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r.   r/   r0   r   )r0   r   r0   r   )r.   r   r0   rC   )r0   rK   r0   r   )r0   r   )r0   rg   )rs   )r2   r   r0   rt   r0   r   )__name__
__module____qualname__r-   r5   r<   propertyr;   rA   rB   rG   r2   rO   rQ   rR   r   r]   r8   r9   rd   rf   r=   rh   rk   rq   r\   r   r3   r+   r+   r+   r,   r      sX    




	r   c                   @  sb   e Zd ZdZddddZd d	d
Zd!ddZd"ddZd#ddZd$ddZ	d%ddZ
d%ddZdS )&CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultslist[CharsetMatch] | Nonec                 C  s   |r	t || _d S g | _d S rI   )ro   _results)r*   r   r+   r+   r,   r-     s   zCharsetMatches.__init__r0   Iterator[CharsetMatch]c                 c  s    | j E d H  d S rI   r   r>   r+   r+   r,   __iter__  s   zCharsetMatches.__iter__item	int | strr   c                 C  sJ   t |tr
| j| S t |tr#t|d}| jD ]}||jv r"|  S qt)z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r1   r   r   r   r   r\   KeyError)r*   r   resultr+   r+   r,   __getitem__
  s   





zCharsetMatches.__getitem__r   c                 C  s
   t | jS rI   r:   r   r>   r+   r+   r,   __len__  s   
zCharsetMatches.__len__r   c                 C  ri   rj   r   r>   r+   r+   r,   __bool__  s   zCharsetMatches.__bool__rC   c                 C  s|   t |tstdt|jt|jtk r0| j	D ]}|j
|j
kr/|j|jkr/||  dS q| j	| t| j	| _	dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r1   r   r6   rD   r   rE   r:   r=   r   r   r3   r8   rG   rF   ro   )r*   r   matchr+   r+   r,   rF     s   


zCharsetMatches.appendCharsetMatch | Nonec                 C  s   | j sdS | j d S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   r>   r+   r+   r,   best2  s   
zCharsetMatches.bestc                 C  s   |   S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   r>   r+   r+   r,   first:  s   zCharsetMatches.firstrI   )r   r   )r0   r   )r   r   r0   r   r   r   )r   r   r0   rC   )r0   r   )r   r   r   __doc__r-   r   r   r   r   rF   r   r   r+   r+   r+   r,   r      s    





r   c                   @  s.   e Zd ZdddZedddZdddZdS )CliDetectionResultpathr   r2   r   rO   rK   alternative_encodingsr]   rq   r   r   r8   r   r9   unicode_pathis_preferredc                 C  sF   || _ |
| _|| _|| _|| _|| _|| _|| _|| _|	| _	|| _
d S rI   )r   r   r2   rO   r   r]   rq   r   r8   r9   r   )r*   r   r2   rO   r   r]   rq   r   r8   r9   r   r   r+   r+   r,   r-   F  s   
zCliDetectionResult.__init__r0   dict[str, Any]c                 C  s2   | j | j| j| j| j| j| j| j| j| j	| j
dS )Nr   r2   rO   r   r]   rq   r   r8   r9   r   r   r   r>   r+   r+   r,   __dict__`  s   zCliDetectionResult.__dict__c                 C  s   t | jdddS )NT   )ensure_asciiindent)r   r   r>   r+   r+   r,   to_jsonp  s   zCliDetectionResult.to_jsonN)r   r   r2   r   rO   rK   r   rK   r]   r   rq   rK   r   r   r8   r   r9   r   r   r   r   r   )r0   r   r   )r   r   r   r-   r   r   r   r+   r+   r+   r,   r   E  s
    
r   N)
__future__r   Zencodings.aliasesr   jsonr   rer   typingr   r   r   r	   Zconstantr   r   utilsr   r   r   r   r   r   r   ZCoherenceMatchr   r   r+   r+   r+   r,   <module>   s     sC