o
    *jS                     @   sN   d dl mZ G dd deZG dd deZG dd deZG dd	 d	Zd
S )    )normalize_chinese_numberc                   @   s   e Zd Zdd ZdS )TrieNodec                 C   s   i | _ d| _dS )6
        Initialize your data structure here.
        FN)datais_wordself r	   n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/preprocessors/ofa/utils/text2phone.py__init__   s   
zTrieNode.__init__N)__name__
__module____qualname__r   r	   r	   r	   r
   r      s    r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )Triez
    trie-tree
    c                 C   s   t  | _dS )r   N)r   rootr   r	   r	   r
   r      s   zTrie.__init__c                 C   s@   | j }|D ]}|j|}|st |j|< |j| }qd|_dS )z\
        Inserts a word into the trie.
        :type word: str
        :rtype: void
        TN)r   r   getr   r   )r   wordnodecharschildr	   r	   r
   insert   s   
zTrie.insertc                 C   s,   | j }|D ]}|j|}|s dS q|jS )zb
        Returns if the word is in the trie.
        :type word: str
        :rtype: bool
        F)r   r   r   r   )r   r   r   r   r	   r	   r
   search)   s   zTrie.searchc                 C   s*   | j }|D ]}|j|}|s dS qdS )z
        Returns if there is any word in the trie that starts with the given prefix.
        :type prefix: str
        :rtype: bool
        FT)r   r   r   )r   prefixr   r   r	   r	   r
   
startsWith6   s   zTrie.startsWithc                    s\    fdd g }|  |s|S | |r|| |S | j}|D ]}|j|}q  ||S )zn
          Returns words started with prefix
          :param prefix:
          :return: words (list)
        c                    sH   g }|j r
||  |j D ]}| | t| |j| q|S N)r   appendr   keysextendstrr   )preZpre_nodeZ	word_listxget_keyr	   r
   r"   J   s   
"zTrie.get_start.<locals>.get_key)r   r   r   r   r   r   )r   r   wordsr   r   r	   r!   r
   	get_startC   s   



zTrie.get_startN)	r   r   r   __doc__r   r   r   r   r$   r	   r	   r	   r
   r      s    r   c                       sH   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	  Z
S )TrieTokenizerz'
    word_split based on trie-tree
    c                    s    t t|   || _|   d S r   )superr&   r   	dict_pathcreate_trie_tree)r   r(   	__class__r	   r
   r   c   s   zTrieTokenizer.__init__c                 C   sh   g }t | jddd }|D ]}|| dd dd qW d    |S 1 s-w   Y  |S )Nrzutf-8)modeencoding	r   z	utf-8-sig)openr(   r   stripsplitencodedecode)r   r#   fileliner	   r	   r
   	load_dicth   s   
zTrieTokenizer.load_dictc                 C   s    |   }|D ]}| | qd S r   )r7   r   )r   r#   r   r	   r	   r
   r)   p   s   zTrieTokenizer.create_trie_treec                 C   sF   |t |d kr!|| |jv r!|d }| |j||d   ||}|S )N   )lenr   	mine_tree)r   treesentencetrace_indexr	   r	   r
   r:   u   s   zTrieTokenizer.mine_treec                 C   s   g }t |}|dkrGd}| | j||}|dkr.||dd  |dt | }t |}n||d|  ||t | }t |}|dks
|S )Nr   r8   )r9   r:   r   r   )r   r<   tokensZsentence_lenr=   r	   r	   r
   tokenize~   s   
zTrieTokenizer.tokenizec                 C   s   d}g }g }|D ]=}t |dkr4|dkr||d d   q|d| ||d d   g }d}q|dkr@|| d}q|| q|S )Nr   r8    )r9   r   join)r   Z
token_listflagoutputtempir	   r	   r
   combine   s    
zTrieTokenizer.combine)r   r   r   r%   r   r7   r)   r:   r?   rF   __classcell__r	   r	   r*   r
   r&   ^   s    	r&   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )
Text2Phonec                 C   s   t || _| || _d S r   )r&   trie_cwsget_phone_map	phone_map)r   phone_dict_pathr	   r	   r
   r      s   
zText2Phone.__init__c                 C   sb   t  }t|d}|D ]}| d\}}||vr|||< qW d    |S 1 s*w   Y  |S )Nr,   r/   )dictr0   r1   r2   )r   rL   rK   Zphone_map_file_readerr6   keyZphone_seriesr	   r	   r
   rJ      s   
zText2Phone.get_phone_mapc                 C   sx   t |}| j|}g }|D ](}|| jv r|| j|  qt|dkr6|D ]}|| jv r5|| j|  q&qd|S )Nr8    )r   rI   r?   rK   r   r9   rA   )r   textr>   Zphonesr   charr	   r	   r
   trans   s   


zText2Phone.transN)r   r   r   r   rJ   rR   r	   r	   r	   r
   rH      s    	rH   N)Zmodelscope.utils.chinese_utilsr   objectr   r   r&   rH   r	   r	   r	   r
   <module>   s
   
NH