o
    *j=                     @   s   d dl Z d dlmZmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ dd	lmZ dd
lmZ de_de_de_G dd deZdS )    N)AnyDict)Image	ImageFile)create_transform)
transforms)
load_image)ModeKeys   )OfaBasePreprocessor)RandomAugmentTc                       s   e Zd ZdZejf fdd	Zdeee	f deee	f fddZ
deee	f deee	f fdd	Zdeee	f deee	f fd
dZ  ZS )"OfaImageClassificationPreprocessorz9
    OFA preprocessor for image classification task.
    c                    s   t t| j|||g|R i | | jtjkr9tdd tj| j	| j	ftj
jdt tj| j| jdg| _dS t| j	ddddd	d
d| j| jd
| _ttdd dd g| jjdd | jjd gtdddg ddg| jjdd g| _dS )zpreprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
            model_dir (str): model path,
            mode: preprocessor mode (model mode)
        c                 S   
   |  dS NRGBconvertimage r   r/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/preprocessors/ofa/image_classification.py<lambda>+      
 z=OfaImageClassificationPreprocessor.__init__.<locals>.<lambda>)interpolation)meanstdTg?zrand-m9-mstd0.5-inc1Zbicubicg      ?Zpixelr
   )
Z
input_sizeZis_trainingZcolor_jitterZauto_augmentr   Zre_probZre_modeZre_countr   r   c                 S   s   | | S Nr   )xyr   r   r   r   ?   s    c                 S   r   r   r   r   r   r   r   r   A   r   N      )
ZIdentityZAutoContrastZEqualizeZ
BrightnessZ	SharpnessZShearXZShearYZ
TranslateXZ
TranslateYZRotate)ZisPILZaugs   )superr   __init__moder	   TRAINr   ZComposeZResizeZpatch_image_sizeZInterpolationModeZBICUBICZToTensorZ	Normalizer   r   patch_resize_transformr   	functoolsreducer   )selfcfgZ	model_dirr$   argskwargs	__class__r   r   r#      s^   
	

z+OfaImageClassificationPreprocessor.__init__datareturnc                 C   s    | j tjkr| |S | |S r   )r$   r	   r%   _build_train_sample_build_infer_sample)r)   r/   r   r   r   __call__S   s   

z+OfaImageClassificationPreprocessor.__call__c                 C   s   |  |}d|d }|d di|d< | j|dd|d< t| j|d dd	 g|d
< | jduritt|d
 t| j	f
 }tt|d
 D ]}|d
 d|d   }| j|}d|| |< qJ||d< |S )a  
        Building training samples.

        step 1. Preprocess the data using the logic of `_build_infer_sample`
            and make sure the label data in the result.
        step 2. Preprocess the label data. Contains:
            - add ` ` before the label value and add `ref_dict` value
            - tokenize the label as `target` value without `bos` token.
            - add `bos` token and remove `eos` token of `target` as `prev_output_tokens`.
            - add constraints mask.

        Args:
            data (`Dict[str, Any]`): Input data, should contains the key of `image`,
                `prompt` and `label`, `image` refers the image input data, `prompt`
                refers the text input data the `label` is the supervised data for training.
        Return:
            A dict object, contains source, image, mask, label, target tokens,
            and previous output tokens data.
        z {}labelg      ?Zref_dictF)Zadd_bostargetNZprev_output_tokensr
   Tconstraint_mask)r2   formattokenize_texttorchcatbos_itemZconstraint_trieZzeroslenZtgt_dictboolrangetolistZget_next_layer)r)   r/   sampler5   r7   iZconstraint_prefix_tokenZconstraint_nodesr   r   r   r1   Y   s6   


z6OfaImageClassificationPreprocessor._build_train_samplec                 C   s~   |  || jd  }| |}| jjdd}| |}||tdg| j	d}d| jv r=| jd |v r=|| jd  |d< |S )az  
        Building inference samples.

        step 1. Get the pillow image.
        step 2. Do some transforms for the pillow image as the image input,
            such as resize, normalize, to tensor etc.
        step 3. Tokenize the prompt as text input.
        step 4. Determine Whether or not to add labels to the sample.

        Args:
            data (`Dict[str, Any]`): Input data, should contains the key of `image` and `prompt`,
                the former refers the image input data, and the later refers the text input data.
        Return:
            A dict object, contains source, image, mask and label data.
        r   promptz what does the image describe?T)sourcepatch_imageZ
patch_maskZdecoder_prompttextr4   )
Zget_img_pilZ
column_mapr&   r*   modelgetr9   r:   Ztensorr<   )r)   r/   r   rE   rC   ZinputsrA   r   r   r   r2      s   


z6OfaImageClassificationPreprocessor._build_infer_sample)__name__
__module____qualname____doc__r	   Z	INFERENCEr#   r   strr   r3   r1   r2   __classcell__r   r   r-   r   r      s    ":"*(r   )r'   typingr   r   r:   ZPILr   r   Z	timm.datar   Ztorchvisionr   Zmodelscope.preprocessors.imager   Zmodelscope.utils.constantr	   baser   Zutils.vision_helperr   ZLOAD_TRUNCATED_IMAGESZMAX_IMAGE_PIXELSr   r   r   r   r   <module>   s   