o
    "j                     @   s^   d dl Z ddlmZ G dd dZdd Zdd	 ZG d
d de jjZG dd de j	j
ZdS )    N   )	CUDAGraphc                   @   s   e Zd Zdd ZdS )CUDAGraphContextc                 C   s&   d| _ || _t | _t | _|| _d S )Nr   )steplayerr   forward_graphbackward_graphnum_warmup_stepsselfr   r	    r   f/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/device/cuda/cuda_graphed_layer.py__init__   s
   
zCUDAGraphContext.__init__N)__name__
__module____qualname__r   r   r   r   r   r      s    r   c                 C   s$   t | tjr|  }| j|_|S | S N)
isinstancepaddleTensordetachZstop_gradient)xZ
x_detachedr   r   r   r      s
   r   c                 C      t | tjr	| jS | S r   r   r   r   Zgradr   r   r   r   get_grad&   s   r   c                   @   s$   e Zd Zedd Zedd ZdS )_CUDAGraphedLayerc                 G   s:  dd |D }|j |jk r1t  |j| }W d    n1 s!w   Y  | ||| | S |j |jkrr||_|j	  t  |j|j }W d    n1 sTw   Y  |j
  |j  ||_| ||j| | S t|j|D ]\}}t|tjr||d qx|j  |j}| ||j| | S )Nc                 S   s   g | ]}t |qS r   )r   .0r   r   r   r   
<listcomp>0   s    z-_CUDAGraphedLayer.forward.<locals>.<listcomp>T)r   r	   r   Zenable_gradr   Zsave_for_backwardr   Zargs_staticr   capture_begincapture_endreplayy_staticzipr   r   copy_)ctxcontextargsyZx_staicr   r   r   r   forward.   s4   





z_CUDAGraphedLayer.forwardc                    s   |   \}}}|j|jk r|| n,|j|jkr3||_|j  |j|j |j  |j	  n|j
|d |j	  dd  t fdd|D }| jd7  _|S )NTc                 S   r   r   r   r   r   r   r   r   _   s   z,_CUDAGraphedLayer.backward.<locals>.get_gradc                 3   s    | ]} |V  qd S r   r   r   r   r   r   	<genexpr>b   s    z-_CUDAGraphedLayer.backward.<locals>.<genexpr>r   )Zsaved_tensorr   r	   backwardZ	dy_staticr   r    r#   r!   r"   r%   tuple)r&   Zdyr'   r(   r)   Z	args_gradr   r+   r   r-   O   s   


z_CUDAGraphedLayer.backwardN)r   r   r   staticmethodr*   r-   r   r   r   r   r   -   s
    
 r   c                       s4   e Zd ZdZddejjf fddZdd Z  Z	S )	CUDAGraphedLayera  
    CUDAGraphedLayer: A PaddlePaddle Layer to convert an eager mode model to utilize CUDA Graphs.

    CUDA Graphs provide a way to capture kernel-level operations of a model and play
    them back efficiently, allowing for potential speedups in repetitive computations,
    such as those during training iterations. This layer is a wrapper that enables
    the usage of CUDA Graphs with PaddlePaddle models.

    Overview:
    - The layer encapsulates another layer (the model to be converted).
    - During the first few (num_warmup_steps) iterations, the layer operates in
      eager mode without any CUDA Graphs.
    - After the warmup steps, the layer captures the forward and backward computations
      and replays them using CUDA Graphs in subsequent iterations.

    Usage:
        model = Model()
        graphed_model = CUDAGraphedLayer(model)

    Parameters:
    - layer (paddle.nn.Layer): The PaddlePaddle model/layer to be converted.
    - num_warmup_steps (int): The number of iterations before the CUDA Graph
      capture begins. Default is 3.

    Notes:
    - Restrictions:
        * CPU-GPU Synchronization: Operations that synchronize the CPU with the GPU, like device to host transfers, are not allowed.
        * CPU Work: Any operations on the CPU within the captured graph are not recorded.
        * Memory Address (Pointer) Consistency: Replays consistently read from and write to identical virtual memory addresses.
        * Dynamic Operations:
            - Control Flow: Dynamic control flows, especially those based on CPU data like if/else statements, are prohibited.
            - Tensor Shapes: Dynamic tensor shapes are not supported.

    - Allowed Operations:
        * CUDA RNG Operations: CUDA-based Random Number Generation operations are allowed.
       r   c                    s   t    t||| _d S r   )superr   r   r'   r
   	__class__r   r   r      s   
zCUDAGraphedLayer.__init__c                 G   s   t j| jg|R  S r   )r   applyr'   )r   r(   r   r   r   r*      s   zCUDAGraphedLayer.forward)r1   )
r   r   r   __doc__r   nnLayerr   r*   __classcell__r   r   r3   r   r0   g   s    %r0   )r   Zgraphsr   r   r   r   ZautogradZPyLayerr   r7   r8   r0   r   r   r   r   <module>   s   		: