o
    "j&z                     @   s   d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d ZdZG dd deZG dd	 d	ZG d
d deZG dd dZG dd deZG dd deZG dd deZG dd deZG dd dZG dd dZdd ZdS )    N)Enum)OpRole)core   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )CostNodeTyper   r               N)	__name__
__module____qualname__DEFAULTCOMPUTATIONCOMMUNICATIONVARIABLEMERGEDNOP r   r   s/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/auto_parallel/static/cost_model.pyr      s    r   c                   @   s   e Zd Zdd ZdS )Costc                 C   s   d | _ d | _d | _d S N)runtime
static_mempeak_memselfr   r   r   __init__'   s   
zCost.__init__Nr   r   r   r   r   r   r   r   r   &   s    r   c                   @   s   e Zd ZdZdZdZdZdS )CostModelModer   r   r   r   N)r   r   r   r   BENCHMARKINGZANALYSISZMIXEDr   r   r   r   r   -   s
    r   c                   @   s0   e Zd ZdddZedd Zejdd ZdS )CostNodeNc                 C   s(   || _ || _|| _d| _d| _d| _d S )Nr   F)idnodetype_costis_optimis_bwd)r   r#   	node_typer"   r   r   r   r   5   s   
zCostNode.__init__c                 C   s   | j S r   )r%   r   r   r   r   cost=   s   zCostNode.costc                 C   s   |dk rt d|| _d S )Nr   zCost must be above 0.)
ValueErrorr%   )r   r)   r   r   r   r)   A   s   
r   )r   r   r   r   propertyr)   setterr   r   r   r   r!   4   s    

r!   c                       s   e Zd Zd fdd	Z  ZS )MergedOpsCostNodeNFc                    s    t  d || || _|| _d S r   )superr   	node_listr'   )r   r(   r"   base_node_listr'   	__class__r   r   r   I      
zMergedOpsCostNode.__init__NNF)r   r   r   r   __classcell__r   r   r1   r   r-   H   s    r-   c                       s:   e Zd Z	d fdd	Zdd Zdd Zdd	d
Z  ZS )CommOpCostNodeNFc                    s.   t  ||| || _g | _|j| _|| _d S r   )r.   r   r/   ranksr$   	comm_typer'   )r   r#   r(   r"   Zcomm_node_listr'   r1   r   r   r   P   s
   
zCommOpCostNode.__init__c                 C   s
   || _ d S r   )r7   )r   r7   r   r   r   	set_ranksY   s   
zCommOpCostNode.set_ranksc                 C   s   || _ || _d S r   )input_shapeoutput_shape)r   r:   r;   r   r   r   
set_shapes\   s   
zCommOpCostNode.set_shapesc                 C   s   d}t | j}t| jd }d| jv r#||| d|d    | _d S d| jv r5||| |d   | _d S d| jv rA|| | _d S d| jv sKd	| jv rR|| | _d S d
| _d S )NgMb@@r	   Z	allreducer   r   Zgather	broadcastsendrecvr   )lenr7   npprodr:   r8   r%   )r   cluster	BANDWIDTHZ	num_ranksZcomm_volumnr   r   r   init_comm_cost`   s   





zCommOpCostNode.init_comm_costr4   r   )r   r   r   r   r9   r<   rE   r5   r   r   r1   r   r6   O   s    	r6   c                       s.   e Zd Z				d fdd	Zdd Z  ZS )TensorCostNodeNc                    s   t  ||| |jdks|jdkrddg| _tj| _n|j| _|j| _d| _d | _|| _	| jtjks9|jtj
krA|  jd9  _n|jtjkrO|  jd9  _n|jtjkrYd| _nd| _d | _|d urh|| _d S d S )NZcreate_py_reader_0Zdouble_buffer_0r   r   r	      )r.   r   nameshapepaddlefloat32dtypeZdtype_factorpersistableshared_node_idZint32Zint64Zuint8
batch_size)r   r#   r(   r"   r0   rO   rN   r1   r   r   r   v   s(   	


zTensorCostNode.__init__c                 C   s>   d}| j jD ]}|dkr| jd usJ d| j}||9 }q|S )Nr   zBatch size not decided.)r#   rI   rO   )r   pir   r   r   get_size   s   
zTensorCostNode.get_size)NNNN)r   r   r   r   rS   r5   r   r   r1   r   rF   u   s     rF   c                       s&   e Zd Zd fdd	Zdd Z  ZS )CompOpCostNodeNFc                    s    t  ||| || _|| _d S r   )r.   r   r'   r&   )r   r#   r(   r"   r'   r&   r1   r   r   r      r3   zCompOpCostNode.__init__c                 C   s0   | j j }|| v r|| | _d S d| _d S )Ng        )r#   descr"   keysr)   )r   	cost_dataop_idr   r   r   init_comp_cost   s   
zCompOpCostNode.init_comp_cost)NFF)r   r   r   r   rY   r5   r   r   r1   r   rT      s    rT   c                   @   s   e Zd ZdddZdS )	PipeEventrP   c                 C   s"   || _ || _|| _|| _d| _d S )NrP   )stage_idrH   durations_timee_time)r   r[   Z
event_namer\   
start_timer   r   r   r      s
   
zPipeEvent.__init__N)rP   r   r   r   r   r   rZ      s    rZ   c                   @   s   e Zd ZejddddddfddZdd Zdd	 Zd.d
dZdd Z	dd Z
d/ddZdd Zd0ddZdd Zdd Zd1ddZd1ddZd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- ZdS )2	CostModelNr   r   c                 C   s   || _ || _|| _|| _i | _i | _i | _i | _|| _|| _	|| _
| j
d ur>i | _t| j
D ]\}}	|	D ]}
|| j|
< q4q.nd | _i | _g | _g | _g | _d S r   )modeopcall_overheadrO   microbatch_numnodesorigin_graphop_graphruntime_graphrC   rW   pp2rankrank2pp	enumerate	ring2rankfwd_timebwd_time
optim_time)r   ra   rC   rO   rc   rb   standalone_cost_datapipeline_configZ	stage_idxr7   rankr   r   r   r      s.   


zCostModel.__init__c                 C   s  t |jdksJ d|jd }d}| j|tjtjjj	d}t
|tjd||< |j D ]}	|	j}t
|	tj|||< g g g||< q.|jD ]*}
|
jd t|
j }|
jdse|
jdse|
jd	rd
}|
jdr|
jdkr|
jds|
d}|| jvrt | j|< | j| | d|
dd v }n|
jd	rd|
dd v }n|
jdrd|
dd v }t|
tj||}n$t|
dttjkpd|
j v }d|
j!v }t"|
tj#|||}|$| |||< g g g||< dg}dg}t%t |
j!D ].}z'|
|
j!| d }|| }|| t& '|j( || t) '|j( |j*}W q   Y qt%t |
j+D ]0}z(|
|
j+| d }|| }|| t) '|j( || t& '|j( |j*}W q3   Y q3|jtjkrq|,|| qFi }|- D ]t\}}|jtjkr|j.j/rd}|| t& D ]Z}|| }|jtj#kr||| t) v r|| t) 0| || t& 0| |d7 }|d|  }t
|j.tj||d}g g g||< || t) '| || t& '| |||< qqx|- D ]	\}}|||< q|S )Nr   z(Program more than 1 block not supported.r   Zlod_tensor_blocking_queue_0)rH   rL   r$   _c_r>   r?   Fc_sync_calc_streamc_embeddingring_idz@GRADZOutXZop_roleZLearningRateZ_write_)rN   )1r@   blocksZglobal_blockZ
create_varrJ   rK   r   ZVarDescZVarTypeZ
LOD_TENSORrF   r   r   varsvaluesrH   opsr$   stridx
startswithattrrk   setaddoutputinputr6   r   intr   ZBackwardZinput_arg_namesZinput_namesrT   r   rY   rangePREDappendr"   SUCCrI   Zoutput_namesr<   itemsr#   rM   remove)r   programrd   graphrW   sub_idxblockZvar_idZnew_varvaroprX   r'   rv   Zop_noder&   Zcomm_input_shapeZcomm_output_shaperR   Zvar_nodeZnew_var_dictnode_idr#   Zwrite_op_cntpred_idpredZ
new_var_idkvr   r   r   _parse_sub_program   s   












zCostModel._parse_sub_programc              	   C   s   || _ t| j | _t|}g | | _g | | _g | | _g | | _t|D ]8\}}| ji  | ji  | ji  | ji  | 	|| j| | j| | j
| jd u rUdn| j|  | q%| jS Nr   )distributed_programr@   
total_rankrd   re   rf   rg   rj   r   r   rW   ri   )r   r   Zsub_prog_cntr   Zsub_progr   r   r   parse_programT  s,   



	zCostModel.parse_programc                 C   s~   g }| j | | t D ]1}| j| | }|jtjks |jtjkr&|| q|jtjkr5|| 	|| }qt
d|j |S )Nz$This type of node not supported yet:)re   r   rd   r$   r   r   r   r   r   _find_succ_opNotImplementedError)r   r   r   Zsucc_ops_idsucc_idsuccr   r   r   r   m  s   
zCostModel._find_succ_opc                 C   s   t | jD ]J}g }| j|  D ]\}}|jtjkrqg g g| j| |< || q|D ]"}| 	||}|| j| | t
< |D ]}| j| | t | q?q,qd S r   )r   r   rd   r   r$   r   r   rf   r   r   r   r   )r   r   Zop_nodes_idr   r#   rX   succ_nodes_idr   r   r   r   build_op_graph~  s   zCostModel.build_op_graphc                 C   s   t | j| _d S r   )copydeepcopyrf   rg   r   r   r   r   build_runtime_graph  s   zCostModel.build_runtime_graphc                 C   sF   |  D ]\}}tt|t || t< tt|t || t< qd S r   )r   listr   r   r   )r   r   r   edgesr   r   r   eliminate_multi_edges  s   zCostModel.eliminate_multi_edgesc                 C   s   t | jD ]\}| j|  D ]R\}}| j| | }|drA|jdsA|jdsA|jd}|	t
| j|  || j q|dsK|dr_|jd}|	||g || j q	 qqd S )Nrs   rt   ru   rv   r>   r?   Zpeer)r   r   rf   r   rd   r~   r"   r#   r   r9   r   rk   rE   rC   )r   r   r   r   r#   rv   Z	peer_rankr   r   r   
merge_comm  s&   

zCostModel.merge_commlinearc           
      C   s   g }d}|D ].}t |tr||j7 }n||j |dkr#||j7 }q|dkr.t||j}qtd| dtt	| }|d j
}ttj|||d}	||	_||	fS )Nr   r   branchz&This type of merging is not supported:Zmerged_)r"   r0   r'   )
isinstancer-   r/   r   r"   r)   maxr   r|   r@   r'   r   r   )
r   to_merge_node_list
merge_typerd   Z
nodes_list	node_costr#   merged_node_idr'   merged_noder   r   r   _merge_node  s.   

zCostModel._merge_nodec                 C   X   d}t | jD ]"}|| j| j| | j| dd7 }|| j| j| | j| dd7 }q|S )a3  
        This method does the following:
        If X depends on Y only, they must be run sequentially.
            [ e.g. A ->- C ->- D   D and E depends on C only.]
            [      B ->-/ \->- E   C depends on A and B.     ]
        We merge X and Y into a new node and sum up their cost time.
        r   Fr'   T)r   r   _merge_linearrd   rg   r   cntr   r   r   r   merge_linear  s   
zCostModel.merge_linearc                 C   r   )a  
        This method does the following:
        If a node has more than one successor, there is *branch*.
            [ e.g. A ->- B ->- D                                       ]
            [       \->- C ->- / , B and C can be run at the same time ]
            case 1: if B or C is null (or D is directly dependent on A),
                    it's equivalent to A->C->D or A->B->D, fall back to self.merge_linear
            case 2: if both B and C are some op,
                    merged_cost = max(cost(B), cost(C))
        r   Fr   T)r   r   _merge_branchrd   rg   r   r   r   r   merge_branch  s   
zCostModel.merge_branchFc                 C   s6  d}t | }|D ]}|| vrq
|| }||jkr |jr!q
|| }t|t }	|	dkr|t d }
||
 }| j||gd|d\}}|||< g g g||< d }z6t|t	 || t	< t||
 t	 dkr}||
 t	 }|
| || t	  |7  < ||
 t || t< W n   Y z%||
 t D ]}z|| t	 
|
 W n   Y q|| t	 | qW n   Y z|t	 D ]}|| t 
| || t | qW n   Y |d ur|D ]}z|| t 
|
 W n   Y q|| t | q|| z||
 W n   Y q
|d7 }| |  |S q
|S )Nr   r   r   r   rd   )r   rV   r'   r&   r@   r   r   r   r   r   r   r   popr   )r   rd   rg   r'   
reduct_cntrt_nodes_idr   r#   r   indr   r   r   r   r   rR   r   r   r   r     s   






;zCostModel._merge_linearc              	      sD  d}t | }|D ]} | }||jkr|jrq
|| }t|t }	|	dkr|t }
g }|
D ] }|
D ]}z|| t }W n   Y q5||v rP||  nq5q1|D ]}|t | || t | |d7 }qTd}zt|t dk st||t d  t dk rW q
W n   Y q
||t d  t d }|
D ]"}zt|| t dks|| t d |krd}W  nW q   Y q|rt|
dkr fdd|
D }| j	|d d\}}| |< g g g||< |g|| t< |t || t< |g|| t< |g|| t< z|
D ]}|
| q|t|d 7 }W  |S    Y q
q
|S )	Nr   r   TFc                    s   g | ]} | qS r   r   ).0rR   rd   r   r   
<listcomp>b  s    z+CostModel._merge_branch.<locals>.<listcomp>r   r   )r   rV   r'   r&   r@   r   r   r   r   r   r   )r   rd   rg   r'   r   r   r   r#   r   Zoutdr   Zsucc_to_elimr   Z	succ_2_idtmpr"   to_mergeZend_node_idrR   r   r   r   r   r   r   r   2  s   




zCostModel._merge_branchc                    s    fdd}t  jD ]E}d}d}d} j|  D ]#} j| | }|jr-|||7 }q|jr7|||7 }q|||7 }q j|  j	|  j
| q j j	 j
fS )Nc                    s0   | j  j }t| tr| jD ]}| j7 }q|S r   )r)   rb   r   r-   r/   )r#   r   itr   r   r   get_node_costz  s
   

z1CostModel.get_runtime_cost.<locals>.get_node_costr   )r   r   rg   rV   rd   r&   r'   rl   r   rm   rn   )r   r   r   Zfwd_costZbwd_costZ
optim_costr   r#   r   r   r   get_runtime_costy  s    zCostModel.get_runtime_costc                 C   sR   g }g }t | jD ]}| | j| | j| \}}}|| || q	||fS r   )r   r   _simulate_memrd   re   r   )r   Zstatic_listZtop_listr   r   cur_memtop_memr   r   r   get_mem  s   

zCostModel.get_memc                 C   s  t d}t|}| D ]\}}t|| t dkr!|| q|d d}d}d}	| s|	 }d }d}
|dkrMt
||}| rGn}|| q-|| }|jtjkrg| }
|jjrc|	|
7 }	||
7 }|| }|jtjkru|jjs|t D ]}|| t | t|| t dkr|| qy|t D ]'}|}|jtjkr|| t | t|| t dkr|jjs|| 8 }q| r1|	||fS )N   r   ZnoprP   )queueQueuer   r   r   r@   r   putemptygetr   r$   r   r   rS   r#   rM   r   r   )r   rd   re   qZ	sim_graphr   r#   r   r   r   sizer   r   r   r   r   r   r   r     sX   







#zCostModel._simulate_memc                 C   s0   | j d u r| jd | jd  | jd  S |  S r   )rh   rl   rm   rn   _simulate_pipeliner   r   r   r   get_pipeline_time  s   
zCostModel.get_pipeline_timec                 C   sh  t | j}g }dg| }d}tt|dd}| jg| }td}t| jD ]}|tdd| j	d  q&|
 s'| }	|	j}
|	jdkr||
 dkrt||
 |	j|	_|	j|	j |	_||	 |
|d kr{|t|
d d| j	|
d  |	jd n|t|
d| j|
 |	jd ||
  d8  < |	j||
< n||	 n|	jdkrt||
 |	j|	_|	j|	j |	_||	 |
dkr|t|
d d| j|
d  |	jd ||
  d7  < ||
  d8  < ||
 dkr|t|
d| j|
 |	jd |	j||
< n)|	jdkrt||
 |	j|	_|	j|	j |	_||	 |	j||
< ntd	|	j |
 r:|D ]}t||}q)|S )
Nr   rP   r   Zfwdr   )r_   ZbwdZoptimz-This type of pipe event is not supported yet.)r@   rh   r   r   rc   r   r   r   rZ   rl   r   r   r[   rH   r   r]   r\   r^   r   rm   rn   r   )r   Z	stage_numZ
event_listZglobal_time
total_timeZfwd_cntZbwd_cntr   rR   eZstidtr   r   r   r     s   





	



AzCostModel._simulate_pipelinec                 C   sf   t  }|  \}}||_||_|   	 d}||  7 }||  7 }|dkr'nq|   |  |_	|S )NTr   )
r   r   r   r   r   r   r   r   r   r   )r   r)   r   r   r   r   r   r   get_cost$  s   
zCostModel.get_costc                 C   s>   |  | |   t| jD ]
}| | j|  q|   d S r   )r   r   r   r   r   rf   r   )r   r   r   r   r   r   init4  s
   
zCostModel.init)r   r   )r   N)F)r   r   r   r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r`      s8    
(s




HG0Qr`   c                 C   s6   |du sJ dt ||||d}||  | }|S )a  
    Estimated cost from distributed program, cluster model and distributed settings.

    Args:
        distributed_program(list): list of paddle programs
        cluster(Cluster): cluster model
        standalone_cost_data(CostData): cost data given by paddle.core
        batch_size(int): batch size of the training workload
        pipeline_config(list): configuration of pipeline stage allocation
    NzFor now, cluster remains None)rC   rO   ro   rp   )r`   r   r   )r   rC   rp   ro   rO   Zcm_ctxr)   r   r   r   estimate_cost<  s   
r   )r   r   enumr   numpyrA   rJ   Z/paddle.distributed.fleet.meta_optimizers.commonr   Zpaddle.frameworkr   r   r   r   r   r   r!   r-   r6   rF   rT   rZ   r`   r   r   r   r   r   <module>   s2   	&+	     	