o
    j]4                     @  sv  d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ erLdd	lmZ d
ZG dd deZeddd9ddZ eddd:ddZ!eddd:ddZ"eddd:ddZ#eddd:ddZ$G dd de
Z%eddd;d"d#Z&d<d(d)Z'		*d=d>d0d1Z(d?d3d4Z)d@d5d6Z*		*d=d>d7d8Z+d*S )Az
Grapheme cluster segmentation following Unicode Standard Annex #29.

This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
defined in UAX #29: Unicode Text Segmentation.

https://www.unicode.org/reports/tr29/
    )annotations)IntEnum)	lru_cache)TYPE_CHECKINGOptional
NamedTuple   )bisearch)
GRAPHEME_L
GRAPHEME_T
GRAPHEME_VGRAPHEME_LVINCB_EXTENDINCB_LINKERGRAPHEME_LVTINCB_CONSONANTGRAPHEME_EXTENDGRAPHEME_CONTROLGRAPHEME_PREPENDGRAPHEME_SPACINGMARKEXTENDED_PICTOGRAPHICGRAPHEME_REGIONAL_INDICATOR)Iterator    c                   @  sH   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdS )GCBz'Grapheme Cluster Break property values.r   r                        	   
            N)__name__
__module____qualname____doc__OTHERCRLFCONTROLEXTENDZWJREGIONAL_INDICATORPREPENDSPACING_MARKLVTLVLVT r9   r9   Q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/wcwidth/grapheme.pyr   ,   s     r   i   )maxsizeucsintreturnc                 C  s   | dkrt jS | dkrt jS | dkrt jS t| trt jS t| tr%t jS t| t	r-t j
S t| tr5t jS t| tr=t jS t| trEt jS t| trMt jS t| trUt jS t| tr]t jS t| tret jS t jS )z;Return the Grapheme_Cluster_Break property for a codepoint.r&   r#   i   )r   r,   r-   r0   	_bisearchr   r.   r   r/   r   r1   r   r2   r   r3   r
   r4   r   r5   r   r6   r   r7   r   r8   r+   r<   r9   r9   r:   _grapheme_cluster_breakB   s6   









rA   boolc                 C     t t| tS )z6Check if codepoint has Extended_Pictographic property.)rB   r?   r   r@   r9   r9   r:   _is_extended_pictographice      rD   c                 C  rC   )z,Check if codepoint has InCB=Linker property.)rB   r?   r   r@   r9   r9   r:   _is_incb_linkerk   rE   rF   c                 C  rC   )z/Check if codepoint has InCB=Consonant property.)rB   r?   r   r@   r9   r9   r:   _is_incb_consonantq   rE   rG   c                 C  rC   )z,Check if codepoint has InCB=Extend property.)rB   r?   r   r@   r9   r9   r:   _is_incb_extendw   rE   rH   c                   @  s"   e Zd ZU dZded< ded< dS )BreakResultz*Result of grapheme cluster break decision.rB   should_breakr=   ri_countN)r'   r(   r)   r*   __annotations__r9   r9   r9   r:   rI   }   s   
 rI   prev_gcbcurr_gcbOptional[BreakResult]c                 C  s&  | t jkr|t jkrtdddS | t jt jt jfv r tdddS |t jt jt jfv r0tdddS | t jkrG|t jt jt jt jfv rGtdddS | t jt jfv r]|t jt j	fv r]tdddS | t jt j	fv rp|t j	krptdddS |t j
kr{tdddS |t jkrtdddS | t jkrtdddS dS )z
    Check simple GCB-pair-based break rules (cacheable).

    Returns BreakResult for rules that can be determined from GCB properties alone, or None if
    complex lookback rules (GB9c, GB11) need to be checked.
    Fr   rJ   rK   TN)r   r,   r-   rI   r.   r4   r5   r7   r8   r6   r/   r3   r2   )rM   rN   r9   r9   r:   _simple_break_check   s&   	" 


rQ   textstrcurr_idxrK   c                 C  sn  t | |}|dur|S |tjkrtdddS t|| }t|rVd}|d }|dkrVt|| }	t|	r;d}|d8 }nt|	rD|d8 }nt|	rQ|rPtdddS nn|dks*| tjkrt|r|d }|dkrt|| }	t	|	}
|
tj
kr{|d8 }nt|	rtdddS n|dksg| tjkr|tjkr|d dkrtd|d dS tdddS |tjkrdnd}td|dS )z
    Determine if there should be a grapheme cluster break between prev and curr.

    Implements UAX #29 grapheme cluster boundary rules.
    NFr   rP   r   Tr   )rQ   r   r0   rI   ordrG   rF   rH   rD   rA   r/   r1   )rM   rN   rR   rT   rK   resultZcurr_ucsZ
has_linkeriZprev_ucsZ	prev_propr9   r9   r:   _should_break   sN   





rX   NunistrstartendOptional[int]Iterator[str]c           
      c  s    | sdS t | }|du r|}||ks||krdS t||}|}d}tt| | }|tjkr1d}t|d |D ]#}tt| | }t||| ||}	|	j}|	j	rY| || V  |}|}q8| || V  dS )aT  
    Iterate over grapheme clusters in a Unicode string.

    Grapheme clusters are "user-perceived characters" - what a user would
    consider a single character, which may consist of multiple Unicode
    codepoints (e.g., a base character with combining marks, emoji sequences).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings.

    Example::

        >>> list(iter_graphemes('cafe\u0301'))
        ['c', 'a', 'f', 'e\u0301']
        >>> list(iter_graphemes('ok\U0001F468\u200D\U0001F469\u200D\U0001F467'))
        ['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
        >>> list(iter_graphemes('ok\U0001F1FA\U0001F1F8'))
        ['o', 'k', '\U0001F1FA\U0001F1F8']

    .. versionadded:: 0.3.0
    Nr   r   )
lenminrA   rU   r   r1   rangerX   rK   rJ   )
rY   rZ   r[   lengthcluster_startrK   rM   idxrN   rV   r9   r9   r:   iter_graphemes   s.   

rd   posc                 C  sl  t | |d  }|dkr|dkr| |d  dkr|d S |dk rF|dkrB|dkrBt | |d  }|dkrBt|tjkrBt| |d S |d S |d }|dkr}|| tk r}t | | }d|  krddk rgn nnt|tjkron|d8 }|dkr}|| tk sT|}tt | | }|tjkrdnd}t|d |D ]}	tt | |	 }
t	||
| |	|}|j
}|jr|	}|
}q|S )a  
    Find the start of the grapheme cluster containing the character before pos.

    Scans backwards from pos to find a safe starting point, then iterates forward using standard
    break rules to find the actual cluster boundary.

    :param text: The Unicode string.
    :param pos: Position to search before (exclusive).
    :returns: Start position of the grapheme cluster.
    r   r#   r      r   r   )rU   rA   r   r2   _find_cluster_startMAX_GRAPHEME_SCANr.   r1   r`   rX   rK   rJ   )rR   re   Z	target_cpZprev_cpZ
safe_startcprb   Zleft_gcbrK   rW   Z	right_gcbrV   r9   r9   r:   rh   <  s:    	rh   c                 C  s    |dkrdS t | t|t| S )a  
    Find the grapheme cluster boundary immediately before a position.

    :param unistr: The Unicode string to search.
    :param pos: Position in the string (0 < pos <= len(unistr)).
    :returns: Start index of the grapheme cluster containing the character at pos-1.

    Example::

        >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
        6
        >>> grapheme_boundary_before('a\r\nb', 3)
        1

    .. versionadded:: 0.3.6
    r   )rh   r_   r^   )rY   re   r9   r9   r:   grapheme_boundary_beforep  s   rk   c                 c  s    | sdS t | }|du r|nt||}t|d}||ks!||kr#dS |}||krCt| |}||k r4dS | || V  |}||ks)dS dS )a  
    Iterate over grapheme clusters in reverse order (last to first).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings in reverse order.

    Example::

        >>> list(iter_graphemes_reverse('cafe\u0301'))
        ['e\u0301', 'f', 'a', 'c']

    .. versionadded:: 0.3.6
    Nr   )r^   r_   maxrh   )rY   rZ   r[   ra   re   rb   r9   r9   r:   iter_graphemes_reverse  s    

rm   )r<   r=   r>   r   )r<   r=   r>   rB   )rM   r   rN   r   r>   rO   )rM   r   rN   r   rR   rS   rT   r=   rK   r=   r>   rI   )r   N)rY   rS   rZ   r=   r[   r\   r>   r]   )rR   rS   re   r=   r>   r=   )rY   rS   re   r=   r>   r=   ),r*   
__future__r   enumr   	functoolsr   typingr   r   r   r	   r?   Ztable_graphemer
   r   r   r   r   r   r   r   r   r   r   r   r   r   collections.abcr   ri   r   rA   rD   rF   rG   rH   rI   rQ   rX   rd   rh   rk   rm   r9   r9   r9   r:   <module>   sB    	@"
0E
D
4