o
    0j<                     @   s   d dl mZ d dlZddlmZ ddlmZ ejedG dd	 d	Z	ejG d
d dZ
ejG dd dZdd Zdd Zddg dddfddZdd Zd ddZejG dd dZdS )!    )ListN   )class_requires_deps   )	benchmarkzopencv-contrib-pythonc                       sP   e Zd ZdZddeddf fddZdedefd	d
ZdedefddZ  Z	S )ResizeVideoaR  Resizes frames of a video to a specified target size.

    This class provides functionality to resize each frame of a video to
    a specified square dimension (height and width are equal).

    Attributes:
        target_size (int): The desired size (in pixels) for both the height
            and width of each frame in the video.
       target_sizereturnNc                       t    || _dS )zInitializes the ResizeVideo with a target size.

        Args:
            target_size (int): The desired size in pixels for the output
                frames. Defaults to 224.
        N)super__init__r	   )selfr	   	__class__ t/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddlex/inference/models/video_detection/processors.pyr   %      

zResizeVideo.__init__videoc                 C   s   ddl }t|}t|d }t|D ]1}t|D ]*}|| | }t|tjr-|j\}}	}
ntd|j|| j	| j	f|j
d|| |< qq|S )a  Resizes all frames of a single video.

        Args:
            video (list): A list of segments, where each segment is a list
                of frames represented as numpy arrays.

        Returns:
            list: The input video with each frame resized to the target size.

        Raises:
            NotImplementedError: If a frame is not an instance of numpy.ndarray.
        r   Nz3Currently, only numpy.ndarray frames are supported.)interpolation)cv2lenrange
isinstancenpndarrayshapeNotImplementedErrorresizer	   ZINTER_LINEAR)r   r   r   num_segZseg_lenijimghw_r   r   r   r   /   s$   
zResizeVideo.resizevideosc                        fdd|D S )a7  Resizes frames of multiple videos.

        Args:
            videos (list): A list containing multiple videos, where each video
                is a list of segments, and each segment is a list of frames.

        Returns:
            list: A list of videos with each frame resized to the target size.
        c                       g | ]}  |qS r   )r   .0r   r   r   r   
<listcomp>[       z(ResizeVideo.__call__.<locals>.<listcomp>r   r   r&   r   r+   r   __call__Q   s   
zResizeVideo.__call__)r   )
__name__
__module____qualname____doc__intr   r   r   r/   __classcell__r   r   r   r   r      s
    

"r   c                       s`   e Zd ZdZddeddf fddZdedefd	d
Zdeeej	  deej	 fddZ
  ZS )Image2ArrayzJConvert a sequence of images to a numpy array with optional transposition.tchwdata_formatr
   Nc                    s*   t    |dv sJ d| || _dS )z
        Initializes the Image2Array class.

        Args:
            data_format (str): The format to transpose to, either 'tchw' or 'cthw'.

        Raises:
            AssertionError: If data_format is not one of the allowed values.
        )r7   Zcthwz3Target format must be in ['tchw', 'cthw'], but got N)r   r   r8   )r   r8   r   r   r   r   b   s
   



zImage2Array.__init__r   c                 C   sP   t |}t|D ]}|| }dd |D }tjdd |D dd}|||< q|S )a  
        Converts a list of video frames to a numpy array, with frames transposed.

        Args:
            video (List): A list of frames represented as numpy arrays.

        Returns:
            List: A numpy array with the video frames transposed and concatenated.
        c                 S   s   g | ]	}| g d qS ))   r      )	transposer*   r"   r   r   r   r,          z)Image2Array.img2array.<locals>.<listcomp>c                 S   s   g | ]	}t j|d dqS )r:   axis)r   expand_dimsr<   r   r   r   r,      r=   r:   r>   )r   r   r   Zconcatenate)r   r   r   r    Z	video_oner   r   r   	img2arrays   s   
zImage2Array.img2arrayr&   c                    r'   )aY  
        Process videos by converting each video to a transposed numpy array.

        Args:
            videos (List[List[np.ndarray]]): A list of videos, where each video is a list
                of frames represented as numpy arrays.

        Returns:
            List[np.ndarray]: A list of processed videos with transposed frames.
        c                    r(   r   )rA   r)   r+   r   r   r,      r-   z(Image2Array.__call__.<locals>.<listcomp>r   r.   r   r+   r   r/         zImage2Array.__call__)r7   )r0   r1   r2   r3   strr   r   rA   r   r   r/   r5   r   r   r   r   r6   ^   s
    *r6   c                       sp   e Zd ZdZddeddf fddZdeej deej fd	d
Z	deeej  deeej  fddZ
  ZS )NormalizeVideozH
    A class to normalize video frames by scaling the pixel values.
         o@scaler
   Nc                    r   )z
        Initializes the NormalizeVideo class.

        Args:
            scale (float): The scale factor to normalize the frames, usually the max pixel value.
        N)r   r   rF   )r   rF   r   r   r   r      r   zNormalizeVideo.__init__r   c                 C   sJ   t |}t|D ]}|| tj| j ||< tj|| dd||< q|S )a0  
        Normalizes a sequence of images by scaling the pixel values.

        Args:
            video (List[np.ndarray]): A list of frames, where each frame is a numpy array to be normalized.

        Returns:
            List[np.ndarray]: The normalized video frames as a list of numpy arrays.
        r   r>   )r   r   astyper   float32rF   r@   )r   r   r   r    r   r   r   normalize_video   s
   
zNormalizeVideo.normalize_videor&   c                    r'   )a^  
        Apply normalization to a list of videos.

        Args:
            videos (List[List[np.ndarray]]): A list of videos, where each video is a list of frames
                represented as numpy arrays.

        Returns:
            List[List[np.ndarray]]: A list of normalized videos, each represented as a list of normalized frames.
        c                    r(   r   )rI   r)   r+   r   r   r,      r-   z+NormalizeVideo.__call__.<locals>.<listcomp>r   r.   r   r+   r   r/      rB   zNormalizeVideo.__call__)rE   )r0   r1   r2   r3   floatr   r   r   r   rI   r/   r5   r   r   r   r   rD      s
    
.rD   c                 C      |  d}| S )NrH   rG   cpu)
gpu_matrixZ
float_32_gr   r   r   convert2cpu      
rO   c                 C   rK   )NZint64rL   )rN   Zint_64_gr   r   r   convert2cpu_long   rP   rQ   g{Gzt?   )
g@M-[?gʾ+?g6qrC?gFh@g_{fI?g̒ 5U@gq@g}8g@gje/@gYnݭ@   r:   c           +      C   sl  ddl }t|| }|  dkr| d} | jd }| jd d| | ks'J | jd }	| jd }
g }|| || d| |	|
 g} || d} || d| || |	 |
 g} |d|
d |
}|||	dg}|||| ddg}|||| |	 |
 g}|d|	d |	}|||
dg	 }|||| ddg}|||| |	 |
 g}|j
 }|| d | }|| d | }||}||||g}|j||tdgddd	}||}||||g}|j||tdgddd	}|||dg}||dd|	|
 g}|||| |	 |
 g}|||dg}||dd|	|
 g}|||| |	 |
 g}|| d | }|| d | }|| d
 }|j| dd|  dd}||ddg}|j
 }|||}|j|dd}|j|dd}||dg}||dg}|	|
 }|| }t|}t|}t|}t|}t|}t|}t|}t|D ]w}g }t|	D ]h}t|
D ]`}t|D ]X} || | |  ||
  | }!||! }"|r||! }#n||! ||!  }#|#|kr'||! }$||! }%||! }&||! }'||! }(||! })|$|
 |%|	 |&|
 |'|	 |"|(|)g}*||* qАqʐq|| q|S )a$  
    Processes the output of a neural network to extract bounding box predictions.

    Args:
        output (Tensor): The output tensor from the neural network.
        conf_thresh (float): The confidence threshold for filtering predictions. Default is 0.005.
        num_classes (int): The number of classes for classification. Default is 24.
        anchors (List[float]): A list of anchor box dimensions used in the model. Default is a list
            of 10 predefined anchor values.
        num_anchors (int): The number of anchor boxes used in the model. Default is 5.
        only_objectness (int): If set to 1, only objectness scores are considered for filtering. Default is 1.
    Returns:
        all_box(List[List[float]]): A list of predicted bounding boxes for each image in the batch.
    r   Nr   r:   rS   r9   )r:   r   r9   Zint32)indexr?   r   T)Zstop_gradientr>   )paddler   dimZ	unsqueezer   Zreshaper;   ZlinspaceZtiletnnZSigmoid	to_tensorZindex_selectr   arrayrG   expZSoftmaxmaxZargmaxrO   rQ   r   append)+outputZconf_threshnum_classesanchorsZnum_anchorsZonly_objectnessrV   Zanchor_stepbatchr#   r$   	all_boxesZgrid_xZgrid_yZsigmoidZxsZysZanchor_wZanchor_hwshs	det_confsZ	cls_confssZcls_max_confsZcls_max_idsZsz_hwZsz_hwabboxescyZcxr    inddet_confZconfZbcxZbcybwZbhZcls_max_confZ
cls_max_idboxr   r   r   get_region_boxes   s   !



 






	ro   c           
      C   s   ddl }t| dkr| S |t| g}tt| D ]}d| | d  ||< q||}g }tt| D ]2}| ||  }|d dkrf|| t|d t| D ]}| ||  }	t||	dd|kred|	d< qPq4|S )zR
    Performs non-maximum suppression on the input boxes based on their IoUs.
    r   Nr:   r   F)x1y1x2y2)rV   r   Zzerosr   Zargsortr^   bbox_iou)
ri   
nms_threshrV   rf   r    ZsortIdsZ	out_boxesZbox_ir!   Zbox_jr   r   r   nms]  s$   

rs   Tc                 C   s  ddl }|rKt| d |d }t| d |d }t| d |d }t| d |d }| d | d  }| d | d  }	|d |d  }
|d |d  }nttt| d | d d  t|d |d d  }tt| d | d d  t|d |d d  }tt| d | d d  t|d |d d  }tt| d | d d  t|d |d d  }| d }| d }	|d }
|d }|| }|| }||
 | }|	| | }d}|dks|dkr|dS ||	 }|
| }|| }|| | }|| S )zJ
    Returns the Intersection over Union (IoU) of two bounding boxes.
    r   Nr9   r:   r          @g        )rV   minr]   rJ   rZ   )Zbox1Zbox2rp   rV   ZmxZMxZmyZMyZw1Zh1Zw2h2ZuwZuhZcwchZcareaZarea1Zarea2Zuarear   r   r   rq   v  s<   2222
rq   c                       s^   e Zd ZdZg fdee ddf fddZdeded	edefd
dZdedefddZ	  Z
S )DetVideoPostProcesszQ
    A class used to perform post-processing on detection results in videos.
    
label_listr
   Nc                    r   )z
        Args:
            labels : List[str]
                A list of labels or class names associated with the detection results.
        N)r   r   labels)r   ry   r   r   r   r     s   
	
zDetVideoPostProcess.__init__predrr   score_threshc                 C   s  dd l }t|}g }t|D ]}|| }|D ]}	g }
||	}	t|	t| jd}t|	jd D ]}|| }t||}|D ]|}tt	|d |d d  d }tt	|d |d d  d }tt	|d |d d  d }tt	|d |d d  d }t	|d	 }tt|d
 d D ]}t	|d
d|   
 }|| }q||kr|
||||g|| jt|d  g q<q/q||
 q|S )Nr   )r`   r9   rt   g      t@r:   r   g      n@r   rS      )rV   r   r   rZ   ro   rz   r   rs   roundrJ   itemr^   r4   )r   r{   rr   r|   rV   r   Zpred_allr    Zoutputsoutpredsrc   ri   rn   x1y1Zx2y2rl   r!   Zcls_confZprobr   r   r   postprocess  s<   

    
zDetVideoPostProcess.postprocessr   c                    s    fdd|D S )Nc                    s   g | ]	} | qS r   )r   )r*   r{   rr   r|   r   r   r   r,     r=   z0DetVideoPostProcess.__call__.<locals>.<listcomp>r   )r   r   rr   r|   r   r   r   r/     s   zDetVideoPostProcess.__call__)r0   r1   r2   r3   r   rC   r   rJ   r   r/   r5   r   r   r   r   rx     s    rx   )T)typingr   numpyr   Z
utils.depsr   Zutils.benchmarkr   Ztimeitr   r6   rD   rO   rQ   ro   rs   rq   rx   r   r   r   r   <module>   s0   D70
 
'