o
    0j                     @   s   d dl Z d dlmZmZmZmZmZ d dlZddl	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ d	d
lmZ ejG dd deZdS )    N)AnyDictListOptionalUnion   )	HPIConfigPaddlePredictorOption)TextToPinyinResult)Fastspeech2Result)PwganResult)	benchmark   )BasePipelinec                       s   e Zd ZdZdZddddddddedee dee d	eeeef  d
ee	 de
deeeeef ef  ddf fddZdeeee ejeej f defddZdeeee f defddZdeeee f defddZ  ZS )TextToSpeechPipelinez Text to Speech Pipeline PipelineZtext_to_speechNFdeviceengineengine_config	pp_optionuse_hpip
hpi_configconfigr   r   r   r   r   r   returnc             	      sl   t  jd||||||d| |d d }	| |	| _|d d }
| |
| _|d d }| || _dS )a  Initializes the text-to-speech pipeline.

        Args:
            config (Dict): Configuration dictionary containing model and other parameters.
            device (Optional[str], optional): The device to use for prediction. Defaults to `None`.
            engine (Optional[str], optional): Inference engine. Defaults to `None`.
            engine_config (Optional[Dict[str, Any]], optional): Engine-specific config. Defaults to `None`.
            pp_option (Optional[PaddlePredictorOption], optional): Paddle predictor options.
                Defaults to `None`.
            use_hpip (bool, optional): Whether to use HPIP. Defaults to `False`.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                HPIP configuration. Defaults to `None`.
        r   Z
SubModulesZTextToPinyinZTextToSpeechAcousticZTextToSpeechVocoderN )super__init__Zcreate_modeltext_to_pinyin_modeltext_to_speech_acoustic_modeltext_to_speech_vocoder_model)selfr   r   r   r   r   r   r   kwargsZtext_to_pinyin_model_configZ$text_to_speech_acoustic_model_configZ#text_to_speech_vocoder_model_config	__class__r   t/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/pipelines/text_to_speech/pipeline.pyr   "   s0   

zTextToSpeechPipeline.__init__inputc           
      k   s   g }t |trW|drStj|std| z"t|ddd}dd | D }W d   n1 s6w   Y  W n t	yR } z
t	d	| d
| d}~ww |g}nmt |t
r|D ]Z}t |tr|drtj|swtd| z%t|ddd}|dd | D  W d   n1 sw   Y  W q^ t	y } z
t	d	| d
| d}~ww || q^n
tdt| d|std|D ]}| |d d g}| |d g}	| |	E dH  qdS )an  Predicts speech recognition results for the given input.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): The input audio or path.
            **kwargs: Additional keyword arguments that can be passed to the function.

        Returns:
            PwganResult: The predicted pwgan results, support str and json output.
        z.txtz(The specified text file does not exist: rzutf-8)encodingc                 S      g | ]}|  qS r   strip.0liner   r   r$   
<listcomp>i       z0TextToSpeechPipeline.predict.<locals>.<listcomp>Nz)An error occurred while reading the file z: z4The specified text file in the list does not exist: c                 S   r(   r   r)   r+   r   r   r$   r.   {   r/   zUnsupported input type: z$. Expected str, list, or np.ndarray.z<The input resulted in an empty list of sentences to process.resultZ	phone_ids)
isinstancestrendswithospathexistsFileNotFoundErroropen	readlinesIOErrorlistextendappend	TypeErrortype
ValueErrorget_text_to_pinyin_result"get_text_to_speech_acoustic_resultr   )
r    r%   r!   Z	sentencesfeitemZsentenceZtext_to_pinyin_resZtext_to_speech_acoustic_resr   r   r$   predictT   st   





zTextToSpeechPipeline.predictc                 C      t | |S )zGet the result of text to pinyin conversion.

        Args:
            input (Union[str, list[str]]): The input text or list of texts.

        Returns:
            TextToPinyinResult: The result of text to pinyin conversion.
        )nextr   r    r%   r   r   r$   rA         z.TextToSpeechPipeline.get_text_to_pinyin_resultc                 C   rG   )zGet the result of text to speech acoustic conversion.

        Args:
            input (Union[str, list[str]]): The input text or list of texts.

        Returns:
            Fastspeech2Result: The result of text to speech acoustic conversion.
        )rH   r   rI   r   r   r$   rB      rJ   z7TextToSpeechPipeline.get_text_to_speech_acoustic_result)__name__
__module____qualname____doc__entitiesr   r   r2   r   r	   boolr   r   r   r   npZndarrayr   rF   r
   rA   r   rB   __classcell__r   r   r"   r$   r      sR    	2
A
r   )r4   typingr   r   r   r   r   numpyrQ   modelsr   r	   Zmodels.text_to_pinyin.resultr
   Z%models.text_to_speech_acoustic.resultr   Z$models.text_to_speech_vocoder.resultr   Zutils.benchmarkr   baser   Ztime_methodsr   r   r   r   r$   <module>   s   