o
    *j                     @   sR   d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 e Z
G dd deZdS )    )ListOptionalUnion)
get_logger)InferFramework)is_vllm_availablec                       sx   e Zd Z				ddededededee f
 fd	d
Zdee	e e	e	e  f de	e fddZ
defddZ  ZS )VllmautoN   model_id_or_dirdtypequantizationtensor_parallel_sizetrust_remote_codec                    sV   t  | t stdddlm} tds|dv rd}|| j||||d| _	dS )	a  
        Args:
            dtype: The dtype to use, support `auto`, `float16`, `bfloat16`, `float32`
            quantization: The quantization bit, default None means do not do any quantization.
            tensor_parallel_size: The tensor parallel size.
        zLInstall vllm by `pip install vllm` before using vllm to accelerate inferencer   )LLM   )Zbfloat16r	   Zfloat16)r   r   r   r   N)
super__init__r   ImportErrorvllmr   r   Zcheck_gpu_compatibilityZ	model_dirmodel)selfr   r   r   r   r   r   	__class__ e/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/pipelines/accelerate/vllm.pyr      s"   zVllm.__init__promptsreturnc           	      K   s   | dd}| dd}| dd}| dd}|s"|dkr"d|d< |r.|t|d	  |d
< |r4||d
< d	dlm} |di |}t|d	 trUdd | jj||dD S dd | jj||dD S )zGenerate tokens.
        Args:
            prompts(`Union[List[str], List[List[int]]]`):
                The string batch or the token list batch to input to the model.
            kwargs: Sampling parameters.
        	do_sampleNnum_beamr
   
max_lengthmax_new_tokensTZuse_beam_searchr   Z
max_tokens)SamplingParamsc                 S      g | ]}|j d  jqS r   Zoutputstext.0outputr   r   r   
<listcomp>F       z!Vllm.__call__.<locals>.<listcomp>)sampling_paramsc                 S   r#   r$   r%   r'   r   r   r   r*   K   r+   )Zprompt_token_idsr,   r   )poplenr   r"   
isinstancestrr   generate)	r   r   kwargsr   r   r    r!   r"   r,   r   r   r   __call__)   s.   
zVllm.__call__
model_typec                    s   t  fdddD S )Nc                    s   g | ]}|   v qS r   )lower)r(   r   r4   r   r   r*   Q   r+   z-Vllm.model_type_supported.<locals>.<listcomp>)llamaZbaichuanZinternlmZmistralaquilaZbloomZfalconZgptZmptoptZqwenr8   )any)r   r4   r   r6   r   model_type_supportedP   s   zVllm.model_type_supported)r	   Nr
   N)__name__
__module____qualname__r0   intr   boolr   r   r   r3   r;   __classcell__r   r   r   r   r   
   s(    
'r   N)typingr   r   r   Z
modelscoper   Z$modelscope.pipelines.accelerate.baser   Zmodelscope.utils.import_utilsr   loggerr   r   r   r   r   <module>   s    