o
    "jN\                     @   sL  d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZmZmZmZmZ d dlmZ ejjjejjjejjjejjjejjjgZee jZG dd	 d	eZd-d
dZdd Z dd Z!dd Z"dd Z#G dd dZ$dd Z%dd Z&dd Z'dd Z(dd Z)d.d!d"Z*d.d#d$Z+d%d& Z,d'd( Z-d.d)d*Z.d+d, Z/dS )/    NOrderedDict)Enum)core)Operator	ParameterProgram	get_flags)
get_loggeris_backward_opis_forward_opis_optimize_opuse_new_executor)OpRolec                   @   s   e Zd ZdZdZdZdS )AutoParallelStreamTypedefaultZauto_parallel_mpZauto_parallel_shardingN)__name__
__module____qualname__ZCALC_STREAMZ	MP_STREAMZSHARDING_STREAM r   r   e/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/passes/pass_utils.pyr   +   s    r   c                 C   s<   |d u rt  }nt|t sJ | D ]
}||vrd||< q|S )NT)r   
isinstance)list_objZordered_dictobjr   r   r   list_to_ordered_dict1   s   r   c                 C   s\   t  }g }|  jD ]!}|jD ]}||vr|| || q|jD ]}|| q#q
|S N)setglobal_blockopsinput_arg_namesappendaddoutput_arg_names)programZvisited_vars
input_varsopin_var_nameout_var_namer   r   r   get_inputs_of_program>   s   



r(   c                 C   s.   t  }|  jD ]}t|j| qt| S r   )r   r   r   r   r"   listkeys)r#   output_varsr%   r   r   r   get_outputs_of_programL   s   r,   c                 C   sZ  t |  j}|dk r||7 }|dkr||k sJ |dk r!||7 }|dkr)||ks-J |||k s3J |  } t|d |d dD ]}|  j|dd qAt|d ddD ]}|  j|dd qU|   t }|  jD ]}|jD ]}|	| qr|j
D ]}|	| q}qmg }	|  jD ]}
|
|vr|	|
 q|	D ]}
|  j|
dd q|   | S )Nr      Fsync)lenr   r   clonerange
_remove_op_sync_with_cppr   r   r!   r"   varsr    Z_remove_var)r#   Zstart_op_idxZ
end_op_idxop_numidxZ
valid_varsr%   r&   r'   Zvars_to_removevarr   r   r   prune_programS   s<   


r:   c                    s|  |sJ dt |  j  dksJ d fdd|D }|d dkr)dg| }|d  kr4|  tt |d D ]}|| ||d  k sLJ dq<g }tt |d D ]}t| || ||d  }|| qWt |}d	d |D }d
d |D }dd t|D }|d |d< td|D ] }	||	 D ]}
tt|	D ]}|
|| v rd|| |
<  nqqqdd |D }|||fS )a  
    Split the program by op_indices.

    For examples, a program has 100 ops, and op_indices = [25, 60].
    Then the program is splitted into 3 parts, containing 25, 35 and 40
    ops respectively.

    The return values are a tuple with 3 elements: the splitted program
    list, the input var names of each splitted program, and the output
    var names of each splitted program.
    zop_indices cannot be emptyr   zprogram cannot be emptyc                    s    g | ]}|d kr
|n|  qS )r   r   ).0r8   r7   r   r   
<listcomp>   s     z!split_program.<locals>.<listcomp>r.   r-   z"op_indices must be strictly sortedc                 S   s   g | ]}t |qS r   )r(   r;   pr   r   r   r=      s    c                 S   s   g | ]}t t|qS r   )r   r,   r>   r   r   r   r=      s    
c                 S      g | ]}t  qS r   r   )r;   _r   r   r   r=          Tc                 S   s   g | ]}t | qS r   )r)   r*   )r;   itemr   r   r   r=      s    )r1   r   r   r    r3   r:   reversed)r#   Z
op_indicesr8   Zsplitted_programsZ	new_splitZ	num_splitr$   r+   Zvalid_output_varsir&   jr   r<   r   split_programv   sD   


rG   c                   @   s<   e Zd ZdZdd Zedd Zdd Zdd	 Zd
d Z	dS )OpInOutInfozc
    Record unused buffer input_vars of op and other var_names except unused buffer input_vars
    c                 C   s   d| _ t | _t | _d S )NF)	_is_buildr   _no_need_buffer_slots_other_arg_names_setselfr   r   r   __init__   s   zOpInOutInfo.__init__c                 C   s   | j S r   )rI   rL   r   r   r   is_build   s   zOpInOutInfo.is_buildc                 C   sd   i }|j D ]	}||||< qi }|jD ]	}||||< qi }|jD ]	}||||< q#|||fS r   )input_namesinputoutput_namesoutputZ
attr_namesattr)rM   r%   inputsZ
input_nameoutputsZoutput_nameattrs	attr_namer   r   r   _get_op_attrs   s   



zOpInOutInfo._get_op_attrsc                 C   s   |  |\}}}t|j|||| _t| jdkrd S |jD ]}|| jvr3||D ]}| j	| q*q|j
D ]}|| jvrL||D ]}| j	| qCq7d| _d S )Nr   T)rY   r   Zinfer_no_need_buffer_slotstyperJ   r1   rP   rQ   rK   r!   rR   rS   rI   )rM   r%   rU   rV   rW   Z	slot_namein_nameZout_namer   r   r   
build_info   s"   





zOpInOutInfo.build_infoc                 C   s   t | jdkp|| jv S )Nr   )r1   rJ   rK   )rM   arg_namer   r   r   	is_needed   s   zOpInOutInfo.is_neededN)
r   r   r   __doc__rN   propertyrO   rY   r\   r^   r   r   r   r   rH      s    
rH   c                 C   s   | | }|d uo|j S r   )_find_var_recursivepersistable)var_nameblockr9   r   r   r   var_can_be_deleted   s   
re   c                 C   sr  t  }|  jD ]}|jD ]}t||  r|| qqt  }| jD ]}|jD ]}t|| r8|| q*q%t||@ }|D ]l}|  j	 }|
d |d|g |ddg |d| t|  |}	|  j|	 | j }
|

d |
dg  |
dd	 |
d
d |
d| |
d|g t| |
}| jd	| qB|   |  d S )NZshadow_outputxoutz@EMPTY@namedatashapedtyper   Zplace   )r   r   r   r"   re   r!   r   sorteddesc	append_opset_typeZ	set_inputZ
set_output	_set_attrr   r    Z_prepend_opinsertr5   )Zcur_progZ	next_progZset_output_namesr%   r]   Zset_input_namesZshadow_var_namesrc   Zshadow_op_descZ	shadow_opZdata_op_descZdata_opr   r   r   prepare_ir_program   sD   





rs   c              	   C   s  | dksJ dt t||}i }| D ]=\}}t ||< |jD ]0}|jD ]*}	|	jdv r.q&t }
|
|	 |	j	|	j
 D ]}t||rO|
|rO|| | q<q&q!qdd t| D }t|}tt|D ]E}|| }| }|| }| }||| @ }td| d| d|  |d	krt|d
ksJ d| d|| ||  |O  < qftdd rt|D ]\}}|t|d kr |S ||d  }t|| ||  q|S )a#  
    Set `skip_gc_vars` for every job in jobs.

    A whole_program is split up into sub_programs according to the schedule mode,
    thus a sub_program's vars might be used as the op's input of the later sub_program,
    and these vars cannot be gc after executing current sub_program.
    r-   z"num_micro_batches needs to be >= 1)c_sync_comm_streamZconditional_blocknopwhilec                 S   r@   r   )r   )r;   rE   r   r   r   r=   +  rB   z$set_skip_gc_vars.<locals>.<listcomp>zSkip gc vars for z-(z): Zbackwardr   znWhen enabling pipeline parallelism stategy, the skip_gc_vars for backward subprogram must be empty, but it is .ZFLAGS_enable_pir_in_executor)dictzipitemsr   blocksr   rZ   rH   r\   r   r"   re   r^   r!   r3   r1   rD   micro_batch_idloggerdebugset_skip_gc_varsr	   	enumeraters   )Znum_micro_batchesZ	job_typesZsub_programsjobsZtype_to_programZtype_to_required_varsrZ   r#   rd   r%   Zop_infor]   Zsuffixed_required_varsZnum_jobsZjob_idZjobZjob_typeZrequired_varsr|   Zskip_gc_varsrE   Z	next_typer   r   r   r   	  sf   






r   c                 C   sp   i }|j |d< |j|d< |j|d< |j|d< |j|d< td| |j|j|j|j	|j
|j|j|j|jd
| d S )N	trainableoptimize_attrregularizerdo_model_average	need_clip)
rd   rZ   rh   rj   rk   	lod_level
error_clipstop_gradientis_databelong_to_optimizerr   )r   r   r   r   r   r   rZ   rh   rj   rk   r   r   r   r   r   )	dst_blocksrc_varZcopied_kwargsr   r   r   _create_paramM  s(   





r   c                 C   s6   | j |j|j|j|j|j|j|j|j|j	|j
d
 d S )N)
rZ   rh   rj   rk   r   rb   r   r   r   r   )
create_varrZ   rh   rj   rk   r   rb   r   r   r   r   )r   r   r   r   r   _create_interd  s   
r   Fc              	   C   s~   |s|  |}n| |}|jtv r,t|dd}|j|j|j||j|j|j	|j
d d S t|tr8t|| d S t|| d S )Nrb   F)rZ   rh   rb   r   r   r   r   )r9   Z_var_recursiverZ   __not_shape_var_type__getattrr   rh   r   r   r   r   r   r   r   r   )	src_blockr   Zsrc_varnameforce_creater   Zpersistr   r   r   _create_vars  s"   




r   c                 C   s~   |j  }||j  |jD ]}| |s|r#| |r#t| ||| q|jD ]}| |s5|r<| |r<t| ||| q'd S r   )rn   ro   Z	copy_fromr   Zhas_varra   r   r"   )r   r   Zsrc_opr   Zdst_op_descZinput_varnameZoutput_varnamer   r   r   _create_program  s&   




r   c              
   C   sF  | j D ]}d}d}tt|jD ]\}}t|r|} nqtt|jD ]\}}|jdv r4|dd |jdkr|dd |d}|d	}|jd }|	|}	|j
|| d
d|	gid|	gid|id |d7 }t|ttjkr||| }tj}
n	|| d }tj}
|j
|dd|	gid|	gi|
|dd}t|ttjkr|dd |d7 }q%|  d}d}t|jD ]\}}|jdkrt|r|} nq|du rqtt|jD ]D\}}||kr n;|jdkr|dr|jd }|	|}	|j|| dd |d8 }t s|j
|dd|	gid|	gidtjid q|  qdS )z
    This implementation refers to lots of Paddle/python/paddle/base/optimizer.py.
    The difference between this function with 'PipelineOptimizer' is that
    'send_v2' op and 'recv_v2' op have been inserted in program by 'reshard'.
    r   N)send_v2recv_v2dynamic_shapeFr   use_calc_streamop_rolering_idZc_sync_calc_streamXZOut)indexrZ   rU   rV   rW   r-   rt   )r   r   Zpipeline_flag r   r/   ru   )r{   r   r)   r   r   rZ   rq   rT   r   r9   Z_insert_op_without_syncintr   ZBackwardZOptimizeZForwardr5   r   Zhas_attrr"   r4   r   )r#   rd   offsetZfirst_optimize_indexr   r%   r   r   rc   r9   Znew_op_roleZsync_comm_opZbackward_recv_indexr   r   r   _insert_sync_for_fthenb_1f1b  s   








r   c                 C   s   | j D ]F}|jD ]@}|jdkr-|dd |dd |d}dt| |j_d|j_q|jd	krG|dd |dd d
|j_d|j_q	 qqdS )a  
    This function is used to replace the function '_insert_sync_for_fthenb_1f1b'.
    The finally target of this function is as follows:
        1. no need to insert the 'c_sync_calc' and 'c_sync_calc' operators
        2. 'send_v2' operator uses 'dist_attr.execution_stream' to set stream of its own.
        3. 'recv_v2' opeator uses 'dist_attr.execution_stream' to set stream of its own.
    r   r   Fr   Tr   Zsend_stream_r   r   Zrecv_streamN)	r{   r   rZ   rq   rT   str	dist_attrZexecution_streamZstream_priority)r#   rd   r%   r   r   r   r   _overlap_send_recv  s    






r   c                    s  |rt |  nt|  t }t }t }dd   fdd}dd }t| jD ]\}|\}}	}
|dkrY|d}||| |d}|||	 |d}|||
 nEt|rp|jjd}|	j
 ||| t|	r|jjd}|	j
 |||	 t|
r|jjd}|	j
 |||
 jD ]'}|jd	v r|jd }d
}|||fD ]}||r|} nq|rt|| qq(|  |  |  |  |  |  |||gS )zi
    This implementation is for fthenb and 1f1b programs and is called in partial_programs function.
    c                 S   s
   | j dv S )NfetchZfetch_v2)rZ   )r%   r   r   r   _is_fetch_op  s   
z2_program_for_fthenb_and_1f1b.<locals>._is_fetch_opc                    s   g }g }g }j D ]1} |rq	t|r|| q	t|r$|| q	t|r.|| q	tdt|d d |||fS )NzThe op role: r   z- isn't one of Forward, Backward or Optimizer.)r   r   r    r   r   
ValueErrorr   rT   )rd   fwd_opsbwd_opsopt_opsr%   r   r   r   r   
_split_ops   s(   

z0_program_for_fthenb_and_1f1b.<locals>._split_opsc                 S   s   |D ]}t | || qd S r   )r   )r   r   r   r%   r   r   r   _add_ops_into_block5  s   z9_program_for_fthenb_and_1f1b.<locals>._add_ops_into_blockr   )
parent_idxr   N)r   r   r   r   r{   rd   r1   Z_create_blockr   Z_set_forward_block_idxZforward_block_idxr   rZ   r   ra   r   r5   Z_roll_to_global_block)r#   Zenable_send_recv_overlapZfwd_progZbwd_progZopt_progr   r   r8   r   r   r   Z	fwd_blockZ	bwd_blockZ	opt_blockZfetch_opr[   r   rd   r   r   r   _program_for_fthenb_and_1f1b  sp   








r   c                 C   sB   | j jsd| j _|j j}| j j|vr|| j j ||j _dS dS )z
    Add the extra event dependcy of the two operators.
    This function mainly aims for the cross-programs in pipeline parallelism,
    especial for the 'send_v2' 'recv_v2' etc.
    TN)r   Zforce_record_eventZevents_to_waitZevent_to_recordr    )Zrecorder_opZ	waiter_opZwaiter_wait_listr   r   r   _add_event_dependencyq  s   r   r   )F)0loggingcollectionsr   enumr   Zpaddle.baser   Zpaddle.base.frameworkr   r   r   r	   Z-paddle.distributed.auto_parallel.static.utilsr
   r   r   r   r   Z/paddle.distributed.fleet.meta_optimizers.commonr   ZVarDescZVarTypeZREADERZSTEP_SCOPESZLOD_TENSOR_ARRAYZFEED_MINIBATCHZ
FETCH_LISTr   INFOr}   r   r   r(   r,   r:   rG   rH   re   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s@   

#36%D

]
b