o
    #jm                     @  s   d Z ddlmZ ddlZddlmZmZ ddlmZ dZ	e
dejZe
dejZe
d	ejZe
d
ejZdddZdddZdddZdS )z<Stage 1b: charset declaration extraction (HTML/XML/PEP 263).    )annotationsN)DETERMINISTIC_CONFIDENCEDetectionResult)lookup_encodingi   s*   <\?xml[^>]+encoding\s*=\s*['"]([^'"]+)['"]s,   <meta[^>]+charset\s*=\s*['"]?\s*([^\s'">;]+)s6   <meta[^>]+content\s*=\s*['"][^'"]*charset=([^\s'">;]+)s&   ^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)databytesreturnDetectionResult | Nonec              	   C  s   d| dd vr
dS d | dddd }t|}|rLz|dd }W n ttfy6   Y dS w t	|}|durLt
| |rLt|tddd	S dS )
aB  Check the first two lines of *data* for a PEP 263 encoding declaration.

    PEP 263 declarations (e.g. ``# -*- coding: utf-8 -*-``) are only valid
    on line 1 or line 2 of a Python source file.

    :param data: The raw byte data to scan.
    :returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
       #N      
      asciiztext/x-pythonencodingZ
confidencelanguage	mime_type)joinsplit
_PEP263_REsearchgroupdecodestripUnicodeDecodeError
ValueErrorr   _validate_bytesr   r   )r   Zfirst_two_linesmatchraw_namer    r    X/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/chardet/pipeline/markup.py_detect_pep263   s&   

r"   c              
   C  s   | sdS | dt  }tttfD ]@}||}|rOz|dd }W n tt	fy/   Y qw t
|}|durOt| |rO|tu rCdnd}t|td|d  S qt| S )a  Scan the first bytes of *data* for a charset declaration.

    Checks for:

    1. ``<?xml ... encoding="..."?>``
    2. ``<meta charset="...">``
    3. ``<meta http-equiv="Content-Type" content="...; charset=...">``
    4. PEP 263 ``# -*- coding: ... -*-`` (first two lines only)

    :param data: The raw byte data to scan.
    :returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
    Nr   r   ztext/xmlz	text/htmlr   )_SCAN_LIMIT_XML_ENCODING_RE_HTML5_CHARSET_RE_HTML4_CONTENT_TYPE_REr   r   r   r   r   r   r   r   r   r   r"   )r   headpatternr   r   r   r   r    r    r!   detect_markup_charset:   s,   

r)   r   strboolc              
   C  s4   z| dt  | W dS  tttfy   Y dS w )zCheck that *data* can be decoded under *encoding* without errors.

    Only validates the first ``_SCAN_LIMIT`` bytes to avoid decoding a
    full 200 kB input just to verify a charset declaration found in the
    header.
    NFT)r#   r   r   LookupErrorr   )r   r   r    r    r!   r   `   s   r   )r   r   r   r	   )r   r   r   r*   r   r+   )__doc__
__future__r   reZchardet.pipeliner   r   Zchardet.registryr   r#   compile
IGNORECASEr$   r%   r&   	MULTILINEr   r"   r)   r   r    r    r    r!   <module>   s&    

&