o
    "jVH                     @   s  d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZmZ dd	lmZ ejjjejjjejjjejjjejjjgZd
ZdZdZee jZedG dd deZ edG dd deZ!edG dd deZ"i fddZ#dS )    N)core)calc_time_by_cost_model   )
get_logger   )PassContextnew_passregister_pass)AutoParallelStreamType_add_event_dependency_program_for_fthenb_and_1f1bsplit_program)PipelinePassBaseforwardZbackwardZ	optimizerZpipeline_scheduler_FThenBc                       ,   e Zd Z fddZdd Zdd Z  ZS )PipelineFThenBPassc                       t    d S Nsuper__init__self	__class__ r/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/passes/pipeline_scheduler_pass.pyr   0      zPipelineFThenBPass.__init__c                 C   s   |  d}g }t|D ]}tt}|| || qt|D ]}tt}|| || q!tt}|d || |S )Nnum_micro_batchesr   	get_attrranger   JobFORWARDset_micro_batch_idappendBACKWARDOPT)r   r   job_listiforward_jobbackward_jobopt_jobr   r   r   _create_job_list3   s   







z#PipelineFThenBPass._create_job_listc                 C   &   |  d}tttg}t||}||fS Nenable_send_recv_overlapr    r#   r&   r'   r   r   programr0   typesZsub_program_listr   r   r   _partial_programsG   s   

z$PipelineFThenBPass._partial_programs__name__
__module____qualname__r   r-   r5   __classcell__r   r   r   r   r   .   s    r   Zpipeline_scheduler_1F1Bc                       sT   e Zd Z fddZdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
  ZS )Pipeline1F1BPassc                    s$   t    ttg| _| dd d S )Nenable_backward_forward_overlapr   )r   r   r&   r#   jobs_in_stable_phaseZset_attrr   r   r   r   r   S   s   

zPipeline1F1BPass.__init__c                 C   s  t d | j| j}}t|t|}}g g }}d\}	}
|	|k r|
|k r|	|k rD| ||	 sD|	d7 }	|	|k rD| ||	 r5|	|krInX||	 }d}|	d7 }	||
 }| |}	 |
|k r||kr|
d7 }
||
 }|| |7 }	 | ||
d  rn|
|k r||kse|r|
|d kr||	 ||
 |	|k r|
|k s*| t	||\}}| t
||\}}| |t	 | |t
 | j  t|t|}}tt||D ]}||k r| j||  ||k r| j||  q||||fS )N)Backward forward overlap enabled in 1F1B.)r   r   r   i  )loggerinfoglobal_blockopslenis_comm_op_valid_to_overlap_op_costr%   _split_program_for_overlappingr&   r#   _multistreaming_for_overlappingr=   clearr!   max)r   backward_programforward_programZbackward_opsZforward_opsZnum_backward_opsZnum_forward_opsZbackward_split_pointsZforward_split_pointsZbackward_op_idZforward_op_idZbackward_op_to_overlapZbackward_cost_to_overlapZforward_op_to_overlapZforward_cost_to_overlapsplitted_backward_job_typessplitted_backward_programssplitted_forward_job_typessplitted_forward_programsZnum_splitted_backward_jobsZnum_splitted_forward_jobsidxr   r   r   _backward_forward_overlapf   s   





;
z*Pipeline1F1BPass._backward_forward_overlapc                 C   s.  |  d}|  d}|  d}g }||ksJ d|| }|| }d}t|D ]}tt}	|	| ||	 |d7 }q'd}
t|D ](}| jD ]}t|}|trV|n|
}|| || qH|d7 }|
d7 }
qCt|D ]}tt	}||
 || |
d7 }
qptt
}|d || |S )Nr   pp_stage	pp_degreez>Num of micro batches should larger than or equal to pp degree.r   r   )r    r!   r   r"   r#   r$   r%   r=   
startswithr&   r'   )r   r   rS   rT   r(   micro_batch_in_warmupmicro_batch_in_1f1bforward_micro_batch_idr)   r*   backward_micro_batch_idjob_typeZjobZmicro_batch_idr+   r,   r   r   r   r-      sH   


















z!Pipeline1F1BPass._create_job_listc                 C   s   t |}d}t|D ]c\}}| jd }| |rmtjj|j_	||j_
|j}|j}	t|d |D ];}
||
  j}t |}t|D ])}|| }|j}|j}t|t|@ sft|	t|@ sft|	t|@ rkt|| qBq1q
d S )Nr?   r   )rD   	enumeraterB   rC   rE   r
   Z	MP_STREAMvalue	dist_attrexecution_streamZstream_priorityZinput_arg_namesoutput_arg_namesr!   setr   )r   ZprogramsrZ   Znum_programsZhigher_stream_priorityZ
program_idr3   Zlast_opZprior_op_input_arg_namesZprior_op_output_arg_namesr)   Zposterior_opsZnum_posterior_opsZop_idZposterior_opZposterior_op_input_arg_namesZposterior_op_output_arg_namesr   r   r   rH     sH   

z0Pipeline1F1BPass._multistreaming_for_overlappingc                 C   s   ddddddddddd
}|j }|| v r|| S |d	krB|jd }|j|j}|d
kr0dS |dkr6dS |dkr<dS |dkrBdS |dkr^|jd }|j|j}|dkrXdS |dkr^dS zt|}|j dkrl|d9 }|W S  ty } zt	d| dt
| d W Y d }~dS d }~ww )Nr   (   L   ^   7         )
c_allreduce_sumZelementwise_addsplitZ
transpose2Z!fused_softmax_mask_upper_triangleZ
layer_normZgeluZdropoutZ
c_identityZrecv_v2Z	matmul_v2)r      i   i  )r      ri   ri   p   )r   rj   ri      _   )r   ri   i      scale   Z   rg      zThe cost of z is unknown since .g        )typekeysr_   blockZ_var_recursiveshaper   	Exceptionr@   rA   repr)r   opZhandwritten_cost_mapZop_typevar_namerw   timeer   r   r   rF   ,  sT   


zPipeline1F1BPass._op_costc                 C   s   |  d}tttg}t||}|  d}|r:td |d |d }}| ||\}}	}
}||
| 7 }|||	 7 }tt	|D ]}t
d||  d||  d q@t
d	| j  ||fS )
Nr0   r<   r>   r   r   ztype = z, sub_programs = 
zjobs_in_stable_phase = )r    r#   r&   r'   r   r@   rA   rR   r!   rD   debugr=   )r   r3   r0   r4   Zsub_programsr<   rL   rK   rM   rN   rO   rP   r)   r   r   r   r5   [  s:   


z"Pipeline1F1BPass._partial_programsc           	      C   sf   |t tfv sJ dt tg t||\}}}g }t|}t|D ]}|| d| d q!||fS )Nzjob_type should be one of z(chunk))r#   r&   r   rD   r!   r%   )	r   rZ   r3   Zsplit_pointsZsplitted_programs__Zsplitted_job_typesZnum_splitted_programsrQ   r   r   r   rG     s   z/Pipeline1F1BPass._split_program_for_overlappingc                 C   s   |j dko|jjtjjkS )Nrg   )rt   r]   r^   r
   ZCALC_STREAMr\   )r   rz   r   r   r   rE     s
   
z,Pipeline1F1BPass.is_comm_op_valid_to_overlap)r7   r8   r9   r   rR   r-   rH   rF   r5   rG   rE   r:   r   r   r   r   r;   Q   s    o-*/$r;   Zpipeline_scheduler_Eager1F1Bc                       r   )PipelineEager1F1BPassc                    r   r   r   r   r   r   r   r     r   zPipelineEager1F1BPass.__init__c                 C   s8  |  d}|  d}|  d}g }d||  d |ksJ dd||  d }|| }d}t|D ]}tt}	|	| ||	 |d7 }q1d}
t|D ](}tt}||
 || |
d7 }
tt}	|	| ||	 |d7 }qMt|D ]}tt}||
 || |
d7 }
qztt}|| |S )Nr   rS   rT   r   r   zGNum of micro batches should larger than 2 * (pp_degree - pp_stage) - 1.r   r   )r   r   rS   rT   r(   rV   rW   rX   _r*   rY   r+   r,   r   r   r   r-     sB   



















z&PipelineEager1F1BPass._create_job_listc                 C   r.   r/   r1   r2   r   r   r   r5     s   

z'PipelineEager1F1BPass._partial_programsr6   r   r   r   r   r     s    )r   c                 C   sh   |dv sJ d| |dkrt tjdd|d< td| |}t }|| g|g| |d}|S )	N)ZFThenB1F1BZ	Eager1F1BzHpipeline scheduler only support FThenB, 1F1B and Eager1F1B, but recieve r   Z#FLAGS_1f1b_backward_forward_overlapr   r<   Zpipeline_scheduler_plan)intosenvirongetr   r   applyr    )Zmain_programZstartup_programZ	pass_nameZ	pass_attrZpipeline_passZpass_contextr   r   r   r   
apply_pass  s   

r   )$loggingr   Zpaddle.baser   Z,paddle.distributed.auto_parallel.static.costr   Zutils.log_utilsr   Z	pass_baser   r   r	   Z
pass_utilsr
   r   r   r   Zpipeline_pass_baser   ZVarDescZVarTypeZREADERZSTEP_SCOPESZLOD_TENSOR_ARRAYZFEED_MINIBATCHZ
FETCH_LISTZ__not_shape_var_type__r#   r&   r'   INFOr@   r   r;   r   r   r   r   r   r   <module>   s6   
"  F8