o
    j%D                     @  s"  d Z ddlmZ dZdgZddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z# eroddl$m%Z% ddlm&Z& ddl'm(Z(m)Z)m*Z* dZ+e	ee,e,f e,e,gdf Z-G dd deeZ.G dd de Z/dS )zCUse the HTMLParser library to parse HTML files that aren't too bad.    )annotationsMITHTMLParserTreeBuilder)
HTMLParser)AnyCallablecastDictIterableListOptionalTYPE_CHECKINGTupleTypeUnion)AttributeDictCDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)DetectsXMLParsedAsHTMLHTMLHTMLTreeBuilderSTRICTParserRejectedMarkup)BeautifulSoup)NavigableString)	_Encoding
_Encodings
_RawMarkupzhtml.parserNc                   @  s   e Zd ZU dZded< dZded< 	 edd6ddZded< ded< ded< d7ddZd8ddZ	d9d:dd Z	d9d;d"d#Z
d<d%d&Zd=d(d)Zd=d*d+Zd<d,d-Zd>d/d0Zd<d1d2Zd<d3d4Zd5S )?BeautifulSoupHTMLParserreplacestrREPLACEignoreIGNOREon_duplicate_attributesoupr   argsr   r+   &Union[str, _DuplicateAttributeHandler]kwargsc                O  s@   || _ || _|jj| _tj| g|R i | g | _|   d S N)r,   r+   builderattribute_dict_classr   __init__already_closed_empty_elementZ_initialize_xml_detector)selfr,   r+   r-   r/    r6   X/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/bs4/builder/_htmlparser.pyr3   T   s   
	z BeautifulSoupHTMLParser.__init__z	List[str]r4   messagereturnNonec                 C  s   t |r0   r   )r5   r8   r6   r6   r7   erroro   s   zBeautifulSoupHTMLParser.errortagattrsList[Tuple[str, Optional[str]]]c                 C  s   | j ||dd | | dS )zHandle an incoming empty-element tag.

        html.parser only calls this method when the markup looks like
        <tag/>.
        F)handle_empty_elementN)handle_starttaghandle_endtag)r5   r<   r=   r6   r6   r7   handle_startendtag   s   z*BeautifulSoupHTMLParser.handle_startendtagTr?   boolc                 C  s   |   }|D ]3\}}|du rd}||v r5| j}|| jkrq|d| jfv r)|||< qtt|}|||| q|||< q| jjjrF| 	 \}}	nd }}	| jj
|dd|||	d}
|
durl|
jrl|rl| j|dd | j| | jdu rx| | dS dS )zHandle an opening tag, e.g. '<tag>'

        :param handle_empty_element: True if this tag is known to be
            an empty-element tag (i.e. there is not expected to be any
            closing tag).
        N )
sourceline	sourceposF)check_already_closed)r2   r+   r)   r'   r   _DuplicateAttributeHandlerr,   r1   Zstore_line_numbersgetposr@   Zis_empty_elementrA   r4   appendZ_root_tag_nameZ_root_tag_encountered)r5   r<   r=   r?   Z	attr_dictkeyvalueZon_duperE   rF   ZtagObjr6   r6   r7   r@      s2   






z'BeautifulSoupHTMLParser.handle_starttagrG   c                 C  s.   |r|| j v r| j | dS | j| dS )zHandle a closing tag, e.g. '</tag>'

        :param tag: A tag name.
        :param check_already_closed: True if this tag is expected to
           be the closing portion of an empty-element tag,
           e.g. '<tag></tag>'.
        N)r4   remover,   rA   )r5   r<   rG   r6   r6   r7   rA      s   	z%BeautifulSoupHTMLParser.handle_endtagdatac                 C  s   | j | dS )z4Handle some textual data that shows up between tags.N)r,   handle_datar5   rN   r6   r6   r7   rO      s   z#BeautifulSoupHTMLParser.handle_datanamec                 C  sh   | drt|dd}n| drt|dd}nt|}t|\}}|r-d| j_| | dS )zHandle a numeric character reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Character number, possibly in hexadecimal.
        x   XTN)
startswithintlstripr   Znumeric_character_referencer,   contains_replacement_charactersrO   )r5   rQ   Z	real_namerN   Zreplacement_addedr6   r6   r7   handle_charref   s   

z&BeautifulSoupHTMLParser.handle_charrefc                 C  s0   t j|}|dur|}nd| }| | dS )zHandle a named entity reference by converting it to the
        corresponding Unicode character(s) and treating it as textual
        data.

        :param name: Name of the entity reference.
        Nz&%s)r   ZHTML_ENTITY_TO_CHARACTERgetrO   )r5   rQ   	characterrN   r6   r6   r7   handle_entityref   s
   z(BeautifulSoupHTMLParser.handle_entityrefc                 C  s&   | j   | j | | j t dS )zOHandle an HTML comment.

        :param data: The text of the comment.
        N)r,   endDatarO   r   rP   r6   r6   r7   handle_comment  s   
z&BeautifulSoupHTMLParser.handle_commentdeclc                 C  s6   | j   |tdd }| j | | j t dS )zYHandle a DOCTYPE declaration.

        :param data: The text of the declaration.
        zDOCTYPE N)r,   r]   lenrO   r   )r5   r_   r6   r6   r7   handle_decl  s   
z#BeautifulSoupHTMLParser.handle_declc                 C  sN   |  drt}|tdd }nt}| j  | j| | j| dS )z{Handle a declaration of unknown type -- probably a CDATA block.

        :param data: The text of the declaration.
        zCDATA[N)upperrU   r   r`   r   r,   r]   rO   )r5   rN   clsr6   r6   r7   unknown_decl   s   
z$BeautifulSoupHTMLParser.unknown_declc                 C  s0   | j   | j | | | | j t dS )z\Handle a processing instruction.

        :param data: The text of the instruction.
        N)r,   r]   rO   Z_document_might_be_xmlr   rP   r6   r6   r7   	handle_pi/  s   

z!BeautifulSoupHTMLParser.handle_piN)r,   r   r-   r   r+   r.   r/   r   )r8   r&   r9   r:   )r<   r&   r=   r>   r9   r:   )T)r<   r&   r=   r>   r?   rC   r9   r:   )r<   r&   rG   rC   r9   r:   )rN   r&   r9   r:   )rQ   r&   r9   r:   )r_   r&   r9   r:   )__name__
__module____qualname__r'   __annotations__r)   r3   r;   rB   r@   rA   rO   rY   r\   r^   ra   rd   re   r6   r6   r6   r7   r$   =   s*   
 

>




	

r$   c                      s   e Zd ZU dZdZded< dZded< eZded< ee	e
gZd	ed
< ded< dZded< 		d&d' fddZ			d(d)dd Zefd*d$d%Z  ZS )+r   zA Beautiful soup `bs4.builder.TreeBuilder` that uses the
    :py:class:`html.parser.HTMLParser` parser, found in the Python
    standard library.

    FrC   is_xmlT	picklabler&   NAMEzIterable[str]featuresz$Tuple[Iterable[Any], Dict[str, Any]]parser_argsTRACKS_LINE_NUMBERSNOptional[Iterable[Any]]parser_kwargsOptional[Dict[str, Any]]r/   r   c                   sp   t  }dD ]}||v r||}|||< qtt| jdi | |p#g }|p'i }|| d|d< ||f| _dS )a  Constructor.

        :param parser_args: Positional arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param parser_kwargs: Keyword arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param kwargs: Keyword arguments for the superclass constructor.
        r*   Fconvert_charrefsNr6   )dictpopsuperr   r3   updatern   )r5   rn   rq   r/   Zextra_parser_kwargsargrL   	__class__r6   r7   r3   K  s   

zHTMLParserTreeBuilder.__init__markupr#   user_specified_encodingOptional[_Encoding]document_declared_encodingexclude_encodingsOptional[_Encodings]r9   DIterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]]c                 c  s    t |tr|dddfV  dS g }|r|| g }|r!|| t|||d|d}|jdu r3td|j|j|j|jfV  dS )a2  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples: (markup, encoding, declared encoding,
             has undergone character replacement)

            Each 4-tuple represents a strategy for parsing the document.
            This TreeBuilder uses Unicode, Dammit to convert the markup
            into Unicode, so the ``markup`` element of the tuple will
            always be a string.
        NFT)known_definite_encodingsuser_encodingsZis_htmlr   zPCould not convert input to Unicode, and html.parser will not accept bytestrings.)	
isinstancer&   rJ   r   Zunicode_markupr   Zoriginal_encodingZdeclared_html_encodingrX   )r5   r{   r|   r~   r   r   r   Zdammitr6   r6   r7   prepare_markupi  s4   




z$HTMLParserTreeBuilder.prepare_markup_parser_classtype[BeautifulSoupHTMLParser]r:   c              
   C  s   | j \}}t|tsJ | jdusJ || jg|R i |}z|| |  W n ty: } zt|d}~ww g |_dS )z
        :param markup: The markup to feed into the parser.
        :param _parser_class: An HTMLParser subclass to use. This is only intended for use in unit tests.
        N)	rn   r   r&   r,   feedcloseAssertionErrorr   r4   )r5   r{   r   r-   r/   parserer6   r6   r7   r     s   


zHTMLParserTreeBuilder.feed)NN)rn   rp   rq   rr   r/   r   )NNN)
r{   r#   r|   r}   r~   r}   r   r   r9   r   )r{   r#   r   r   r9   r:   )rf   rg   rh   __doc__rj   ri   rk   
HTMLPARSERrl   r   r   rm   ro   r3   r   r$   r   __classcell__r6   r6   ry   r7   r   :  s    
 !H)0r   
__future__r   __license____all__html.parserr   typingr   r   r   r	   r
   r   r   r   r   r   r   Zbs4.elementr   r   r   r   r   r   Z
bs4.dammitr   r   Zbs4.builderr   r   r   r   Zbs4.exceptionsr   Zbs4r   r    Zbs4._typingr!   r"   r#   r   r&   rH   r$   r   r6   r6   r6   r7   <module>   s(   4  ~