o
    0jV                     @   s   d dl mZmZmZmZmZmZmZ d dlZ	d dl
mZ ddlmZ ddlmZ dd	lmZmZ d
dlmZmZmZmZmZmZmZmZmZ d
dlmZ d
dlm Z  g dZ!e!Z"G dd deZ#G dd deZ$dS )    )AnyDictListOptionalSequenceTupleUnionN)Image   )FuncRegister   ImageBatchSampler   )RunnerPredictorTransformersPredictor   )	DetPadDetPostProcess	Normalize	PadStride	ReadImageResizeToBatch
ToCHWImage
WarpAffine	DetResult)STATIC_SHAPE_MODEL_LIST)z	RT-DETR-LzRT-DETR-L_wired_table_cell_detz!RT-DETR-L_wireless_table_cell_detPP-DocLayout_plus-LPP-DocBlockLayoutc                       s  e Zd ZdZi ZeeZdddddddeee	e
e	e	f f  deeeef  dee deeee
eef ef  deeeef  f
 fd	d
Zdd Zdd Zde
fddZdee dee fddZ				d5dee deeeef  dedeeee
eef ef  deeeef  f
ddZedd6ddZeddg dg dd fd!d"Zed#d$d% Zed&d7d'd(Zed)d8d+d,Zed-d9d/d0Zd1d2 Zd3d4 Z  Z S ):DetRunnerPredictorz2Object detection predictor using inference runner.N)img_size	threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_moder"   r#   r$   r%   r&   c                   s<  t  j|i | |durA| jtvsJ d| j dt|tr%||f}nt|ttfr7t|dks6J dn
t	dt
| d|durqt|trO||f}n"t|ttfrat|dks`J dnt|trgn
t	d	t
| d|durt|tr|d
v sJ d| || _|| _|| _|| _|| _|  \| _| _dS )aY  Initializes DetPredictor.
        Args:
            *args: Arbitrary positional arguments passed to the superclass.
            img_size (Optional[Union[int, Tuple[int, int]]], optional): The input image size (w, h). Defaults to None.
            threshold (Optional[float], optional): The threshold for filtering out low-confidence predictions.
                Defaults to None.
            layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
                Defaults to None.
                If it's a single number, then both width and height are used.
                If it's a tuple of two numbers, then they are used separately for width and height respectively.
                If it's None, then no unclipping will be performed.
            layout_merge_bboxes_mode (Optional[Union[str, dict]], optional): The mode for merging bounding boxes. Defaults to None.
            **kwargs: Arbitrary keyword arguments passed to the superclass.
        Nz
The model z! is not supported set input shaper   z%The length of `img_size` should be 2.z?The type of `img_size` must be int or Tuple[int, int], but got .z0The length of `layout_unclip_ratio` should be 2.zVThe type of `layout_unclip_ratio` must be float, Tuple[float, float] or Dict, but got )unionZlargeZsmallzfThe value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'] or a dict, but got )super__init__
model_namer   
isinstanceinttuplelistlen
ValueErrortypefloatdictstrr"   r#   r$   r%   r&   _buildpre_opspost_op)selfr"   r#   r$   r%   r&   argskwargs	__class__ t/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/models/object_detection/predictor.pyr*   5   sF   






zDetRunnerPredictor.__init__c                 C      t  S Nr   r9   r>   r>   r?   _build_batch_samplerz      z'DetRunnerPredictor._build_batch_samplerc                 C      t S rA   r   rB   r>   r>   r?   _get_result_class}      z$DetRunnerPredictor._get_result_classreturnc                 C   s   t ddg}| jd D ]'}|d }| j| }|d |}|r'|| fi |n|| }|r2|| q||   | jdurWt|d trK|d |	d| 
| jdd |  }||fS )	zBuild the preprocessors and postprocessors based on the configuration.

        Returns:
            tuple: A tuple containing the preprocessors and postprocessors.
        RGBformatZ
Preprocessr2   Nr   Fr   )r   config	_FUNC_MAPpopappendbuild_to_batchr"   r,   r   insertbuild_resizebuild_postprocess)r9   r7   cfgZtf_keyfuncr:   opr8   r>   r>   r?   r6      s"   




zDetRunnerPredictor._buildpredc                    s  d}g  t |dkr.g g |d |d g |d  fddtt D S t |dkr6g tt |d D ].}|d | }|| }|d || } | t |dkrj|d || }| |}q>t |dkr fddtt  D S d	d  D S )
a  
        Transform batch outputs into a list of single image output.

        Args:
            pred (Sequence[Any]): The input predictions, which can be either a list of 3 or 4 elements.
                - When len(pred) == 4, it is expected to be in the format [boxes, class_ids, scores, masks],
                  compatible with SOLOv2 output.
                - When len(pred) == 3, it is expected to be in the format [boxes, box_nums, masks],
                  compatible with Instance Segmentation output.

        Returns:
            List[dict]: A list of dictionaries, each containing either 'class_id' and 'masks' (for SOLOv2),
                or 'boxes' and 'masks' (for Instance Segmentation), or just 'boxes' if no masks are provided.
        r   r
   r   r   r   c                    *   g | ]}t  | t | d qS ))Zclass_idmasksnparray.0i)pred_class_id	pred_maskr>   r?   
<listcomp>   s    z5DetRunnerPredictor._format_output.<locals>.<listcomp>c                    rX   ))boxesrY   )r[   asarrayr]   )pred_boxra   r>   r?   rb      s    c                 S   s   g | ]	}d t |iqS )rc   rZ   )r^   resr>   r>   r?   rb          )r0   rO   range)r9   rW   Zbox_idx_startidxZnp_boxes_numZbox_idx_endZnp_boxesZnp_masksr>   )re   r`   ra   r?   _format_output   s4   



z!DetRunnerPredictor._format_outputF
batch_datac                 C   s   |j }| jdd D ]}||}q
| jd |}| |}	| |	}
| j|
||dur,|n| j|p2| j|p6| j|p:| jd}|j	|j
dd |D |dS )a  
        Process a batch of data through the preprocessing, inference, and postprocessing.

        Args:
            batch_data (List[Union[str, np.ndarray], ...]): A batch of input data (e.g., image file paths).
            threshold (Optional[float, dict], optional): The threshold for filtering out low-confidence predictions.
            layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to None.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
            layout_merge_bboxes_mode (Optional[Union[str, dict]], optional): The mode for merging bounding boxes. Defaults to None.

        Returns:
            dict: A dictionary containing the input path, raw image, class IDs, scores, and label names
                for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
        Nr#   r$   r%   r&   c                 S      g | ]}|d  qS Zori_imgr>   r^   datar>   r>   r?   rb         z.DetRunnerPredictor.process.<locals>.<listcomp>Z
input_pathZ
page_indexZ	input_imgrc   )	instancesr7   runnerrj   r8   r#   r$   r%   r&   input_pathspage_indexes)r9   rk   r#   r$   r%   r&   datasZpre_opZbatch_inputsZbatch_predsZ
preds_listrc   r>   r>   r?   process   s(   


zDetRunnerPredictor.processr   r   c                 C   sB   |sJ t |trdddddd| }t|d d d ||d}|S )	NZNEARESTZLINEARZBICUBICZAREAZLANCZOS4)r   r   r   r   r
   rl   )target_size
keep_ratiointerp)r,   r-   r   )r9   rz   r{   r|   rV   r>   r>   r?   rR     s   
zDetRunnerPredictor.build_resizeZNormalizeImage)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?Tc                 C   s<   |rd}nd}|r|dkrd}|dkrd}d}t |||dS )Ngp?r   noneZmean_stdr   )scalemeanstd)r   )r9   Z	norm_typer   r   Zis_scaler~   r>   r>   r?   build_normalize  s   z"DetRunnerPredictor.build_normalizeZPermutec                 C   r@   rA   )r   rB   r>   r>   r?   build_to_chw&  s   zDetRunnerPredictor.build_to_chwZPadc                 C   s,   |d u rg d}|d u rg d}t ||dS )N)     _@r   r   )r     r   )size
fill_value)r   )r9   r   r   r>   r>   r?   	build_pad*  s
   zDetRunnerPredictor.build_padr       c                 C   s
   t |dS )N)stride)r   )r9   r   r>   r>   r?   build_pad_stride2  s   
z#DetRunnerPredictor.build_pad_strider      c                 C   s   t |||dS )N)input_hinput_wkeep_res)r   )r9   r   r   r   r>   r>   r?   build_warp_affine6  s   z$DetRunnerPredictor.build_warp_affinec                    s2   g d}t  fdd|D rd}nd}t|dS )N)ZDETRZDINOZRCNNZYOLOv3Z	CenterNetZ	BlazeFacezBlazeFace-FPN-SSHzPP-DocLayout-Lr   r    zPP-DocLayoutV2c                 3   s    | ]}| j v V  qd S rA   )r+   )r^   namerB   r>   r?   	<genexpr>H  s    z4DetRunnerPredictor.build_to_batch.<locals>.<genexpr>)r"   imgscale_factors)r   r   )ordered_required_keys)anyr   )r9   Zmodels_required_imgsizer   r>   rB   r?   rP   :  s
   
z!DetRunnerPredictor.build_to_batchc                 C   st   | j d u r| jdd| _ | js| jdd | _| jd u r%| jdd | _| jd u r2| jdd | _t| jd dS )Ndraw_threshold      ?r$   r%   r&   
label_listlabels)r#   rL   getr$   r%   r&   r   rB   r>   r>   r?   rS   S  s   


z$DetRunnerPredictor.build_postprocessNFNN)Fr   )NN)r   )r   r   T)!__name__
__module____qualname____doc__rM   r   registerr   r   r-   r   r3   r4   boolr5   r*   rC   rF   r6   r   r   r   rj   ry   rR   r   r   r   r   r   rP   rS   __classcell__r>   r>   r<   r?   r!   /   sr    E8
5
r!   c                       s8  e Zd ZdZddddddeeeef  dee deeee	eef ef  deee
ef  f fdd	Zd
d Zdd Zdd Zdee
ef dejfddZdee fddZdejdeeeef  dejfddZdejde	eef dee fddZdeeeef  de	eeef ef fddZdedeeee	eef ef  deee
ef  dee
ef fdd Zd!ee
ef defd"d#Zdee
ef d$ee
ef d%eeef d!ee
ef dee f
d&d'Z		(		d0d)ee deeeef  dedeeee	eef ef  deee
ef  f
d*d+Zd,d- Zd.d/ Z  Z S )1DetTransformersPredictorz>Object detection predictor backed by HuggingFace transformers.Nrm   r#   r$   r%   r&   c                   s\   t  j|i | || _|| _|| _|| _tdd| _|  \| _	| _
| _t| jd| _d S )NrI   rJ   r   )r)   r*   r#   r$   r%   r&   r   read_opr6   image_processorinferr   r   layout_postprocess)r9   r#   r$   r%   r&   r:   r;   r<   r>   r?   r*   d  s   	z!DetTransformersPredictor.__init__c                 C   r@   rA   r   rB   r>   r>   r?   rC   v  rD   z-DetTransformersPredictor._build_batch_samplerc                 C   rE   rA   r   rB   r>   r>   r?   rF   y  rG   z*DetTransformersPredictor._get_result_classc                 C   s8   ddl m}m} | |}| |}|| _|||  fS )Nr   )AutoImageProcessorAutoModelForObjectDetection)Ztransformersr   r   Z_load_pretrained_processorZ_load_pretrained_model_label_source_model_resolve_labels)r9   r   r   r   modelr>   r>   r?   r6   |  s
   

zDetTransformersPredictor._build
predictionrH   c                 C   s   |d     }|d     }|d     }t|dkr,tjdtjdS tj|d d d f jtjdd|d d d f jtjdd|jtjddgd	d
S )Nrc   scoresr   r   r      ZdtypeF)copyr   )Zaxis)	detachcpunumpyr0   r[   emptyfloat32ZconcatenateZastype)r9   r   rc   r   r   r>   r>   r?   _format_transformers_output  s   z4DetTransformersPredictor._format_transformers_outputrx   c                 C   s"   dd l }|jdd |D |jdS )Nr   c                 S   s   g | ]}|d  ddd qS )ori_img_sizeNrl   r>   rp   r>   r>   r?   rb     s    z>DetTransformersPredictor._get_target_sizes.<locals>.<listcomp>r   )torchZtensorZint64)r9   rx   r   r>   r>   r?   _get_target_sizes  s   z*DetTransformersPredictor._get_target_sizesrc   c                 C   st   |j dks
t|ts|S g }|D ]}t|d }|d ||dkr'|| q|s2tjdtjdS tj	|tjdS )Nr   r   r   r   r   )
r   r,   r4   r-   r   rO   r[   r   r   rd   )r9   rc   r#   selectedboxZcat_idr>   r>   r?   _apply_category_threshold  s   
z2DetTransformersPredictor._apply_category_thresholdr"   c              
   C   s  |j dkrg S |\}}g }|D ]u}t|d }|dd  \}}	}
}tdtt|t|}tdtt|	t|}	tdtt|
t|}
tdtt|t|}|
|ksY||	krZqd|  krgt| jk rnn n| j| nt|}|||t|d ||	|
|gd q|S )Nr   r           r   )cls_idlabelZscoreZ
coordinate)	r   r-   maxminr3   r0   r   r5   rO   )r9   rc   r"   widthheightresultsr   r   ZxminZyminZxmaxZymaxr   r>   r>   r?   _to_paddlex_boxes  s.   
.

z*DetTransformersPredictor._to_paddlex_boxesc                 C   s<   |d ur|n| j }|d u rd}t|tr|dfS |t|fS )Nr   r   )r#   r,   r4   r3   )r9   r#   effective_thresholdr>   r>   r?   _get_hf_threshold  s   
z*DetTransformersPredictor._get_hf_thresholdc                 C   s   |p| j |p| j|p| jdS )Nr$   r%   r&   r   )r9   r$   r%   r&   r>   r>   r?   _get_layout_postprocess_kwargs  s
   z7DetTransformersPredictor._get_layout_postprocess_kwargslayout_postprocess_kwargsc                 C   s   t | S rA   )r   values)r9   r   r>   r>   r?   _requires_layout_postprocess  s   z5DetTransformersPredictor._requires_layout_postprocessrq   r   c                 C   sL   |  |}| ||}| |r| jj||d dfi |S | ||d S )Nr   r   )r   r   r   r   applyr   )r9   r   rq   r   r   	formattedr>   r>   r?   _postprocess_prediction  s   

z0DetTransformersPredictor._postprocess_predictionFrk   c                    s   t jdsttjj d|j}dd |D }|\ }j|d}		|	}
j
|
||d}j|||d fddt||D }|j|jd	d |D |d
S )Npost_process_object_detectionz2 does not support `post_process_object_detection`.c                 S   s   g | ]	}t |d  qS )r   )r	   Z	fromarrayrp   r>   r>   r?   rb     rg   z4DetTransformersPredictor.process.<locals>.<listcomp>)images)rx   r#   r   c                    s"   g | ]\}}j || d qS ))r   rq   r   r   )r   )r^   rq   r   r   r   r9   r>   r?   rb     s    c                 S   rn   ro   r>   rp   r>   r>   r?   rb     rr   rs   )hasattrr   RuntimeErrorr2   r   r   rt   r   Zpreprocess_imagesforwardpostprocessr   ziprv   rw   )r9   rk   r#   r$   r%   r&   rx   r   Zhf_thresholdZmodel_inputsoutputspredictionsrc   r>   r   r?   ry     s.   
z DetTransformersPredictor.processc                K   s   | j j||| |d}|S )N)r#   Ztarget_sizes)r   r   r   )r9   r   rx   r#   r;   r   r>   r>   r?   r     s   z$DetTransformersPredictor.postprocessc                    s   | j d u r| jdd| _ | jd u r| jdd | _| jd u r'| jdd | _| jd u r4| jdd | _| jd}|s_t| dd pGt| dd }tt|d	d d
d   r_ fddt D }|setd|S )Nr   r   r$   r%   r&   r   r   r   rL   id2labelc                    s   g | ]} | qS r>   r>   )r^   ri   r   r>   r?   rb   =  rr   z<DetTransformersPredictor._resolve_labels.<locals>.<listcomp>z9Unable to resolve label names for object detection model.)	r#   Zmodel_configr   r$   r%   r&   getattrsortedr1   )r9   r   Zlabel_sourcer>   r   r?   r   (  s2   



z(DetTransformersPredictor._resolve_labelsr   )!r   r   r   r   r   r   r3   r4   r   r   r5   r*   rC   rF   r6   r   r   r[   Zndarrayr   r   r   r   r-   r   r   r   r   r   ry   r   r   r   r>   r>   r<   r?   r   a  s    














,	r   )%typingr   r   r   r   r   r   r   r   r[   ZPILr	   Zutils.func_registerr   Zcommon.batch_samplerr   Z
predictorsr   r   Z
processorsr   r   r   r   r   r   r   r   r   resultr   utilsr   ZRTDETR_L_MODELSZDET_TRANSFORMERS_MODELSr!   r   r>   r>   r>   r?   <module>   s   $,  4