o
    "j                     @   s0  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
Zd dlZd dlmZ ddlmZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ ddlmZm Z m!Z! e"d e"d ej"d G dd dZ#G dd dZ$G dd dZ%G dd de%Z&G dd dZ'dS )    N)OrderedDict)reduce)chainproduct)auto   )estimate_cost)OperatorDistAttrTensorDistAttr)DistributedContextDistributedOperatorContext)DistributedOperator)'get_distributed_operator_impl_containeris_elementwise_op)get_process_group) get_all_distributed_main_program+update_op_dims_mapping_by_default_dist_impl4update_op_dims_mapping_by_elementwise_like_dist_impl{   c                   @   s0   e Zd Zedd Zedd Zedd ZdS )
PlanFilterc                 C   st   d}t |t |ksJ t|D ]'\}}|dkr+|| | |  dks)||dkr+d}|dkr7| d dkr7d}q|S )NTr   r   F)len	enumeratecount)process_mesh_topologytensor_shapedims_mappingvalididxZdim_mapping r   p/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/auto_parallel/static/planner.pycheck_dims_mapping_for_tensor0   s   z(PlanFilter.check_dims_mapping_for_tensorc                 C   s   |j }|d usJ d| jD ]1}||}t|j|| j|s# dS || jr?t|dkr?|dd  D ]
}|dkr>  dS q4q| jD ]}|	|}t|j|| j|sX dS qCdS )Nz$The process mesh should not be None.Fr   r   T)
process_meshinput_arg_namesget_input_dims_mappingr   r!   shapeis_datar   output_arg_namesget_output_dims_mapping)opop_dist_attrvarsr"   var_namer   dimr   r   r    check_dims_mapping_for_opC   s,   



z$PlanFilter.check_dims_mapping_for_opc                 C   s   | j dks| j dks| j dkr;| jD ]}||D ]
}|dkr#  dS qq| jD ]}||D ]
}|dkr9  dS q/q(| j dkrZ| jD ]}|dkrY||D ]
}|dkrX  dS qNqCdS )	NZelementwise_addZ
layer_normsoftmax_with_cross_entropyr   FZlookup_table_v2Zpos_embeddingsT)typer#   r$   r'   r(   )r)   r*   r+   nameitemr   r   r    !check_dims_mapping_for_special_op[   s0   






z,PlanFilter.check_dims_mapping_for_special_opN)__name__
__module____qualname__staticmethodr!   r.   r3   r   r   r   r    r   /   s    

r   c                   @   sP   e Zd Zg dZg dZedd Zedd Zedd Ze		dd
dZ	dS )	PlanSpace)Zcreate_py_readerZcreate_double_buffer_readerread)lod_tensor_blocking_queue_0Zcreate_py_reader_0Zdouble_buffer_0c              	   C   s   t tdt| }|t|kr=d}t|D ]\}}	|	dkr0|| | |	  dks.||	dkr0d}q|r;|t| dS tt|D ])}
||
 sl|
dkrQd||
< |||
  t	| |||d || d||
< |
  qCdS )zCEnumerate dims mapping of tensor by the given process_mesh_topologyr   Tr   r   FN)listranger   r   r   appendcopydeepcopyr8   _enum_dims_mappingpop)r   visitedpathdepthresr   Znumsr   r   r2   ir   r   r    r@   |   s:   zPlanSpace._enum_dims_mappingc                 C   s  | dksJ dg }t d| d D ]}| | dkr|| qg }t t|d ddD ]}g }|||  |t|d krG|t| q+d}|t|k rt|dkr]|||  n`t|dkr| |d |d   dkr| |d |d   dkr|t| n?|| |d |d    |t| |d |d |d7 }n|d |d  | k r|d |d7 }nn|t|k sOq+|S )z6Enumerate all process meshes with the given processes.r   z0The processes must be number and greater than 0.r   r      )r<   r=   r   r>   r?   rA   )	processesZdivisorsrF   resultsresultjr   r   r    enum_process_mesh_topology   sH   






z$PlanSpace.enum_process_mesh_topologyc                    sV  |   j}t  g }t|j}t|j|jD ]0}dd tt	t
tdt	|jD }d}g }	g }
t|j||	||
|| j t|
 |< qt
t fdd  D  }|D ]}t }||_t
  }t|D ],\}}|| |jv r||| | qk|| |jv r||| | qktdjddd	t||}|d
u r0t|jrd}d}zt|}W n ty } zd}W Y d
}~nd
}~ww |r|st||j|rt ||j|rd|j_!d|j_"|#|j qXd}d}zt$|}W n ty } zd}W Y d
}~nd
}~ww |r/|s/t||j|r/t ||j|r/d|j_!d|j_"|#|j qX|j%}t|D ]$\}}|&|rZt||j|rZ|j'j|j_!||j_"|#|j q7qX|st }||_|jD ]}||| j(dd || jD  qi|jD ]}||| j(dd || jD  qt||}d|j_!d|j_"|#|j |S )zQEnumerate the valid distributed attribute for op based on the given process mesh.c                 S      g | ]}d qS Fr   ).0_r   r   r    
<listcomp>   s    z:PlanSpace._enum_valid_dist_attr_for_op.<locals>.<listcomp>r   r   c                    s   g | ]} | qS r   r   )rO   keyZdims_mapping_dictr   r    rQ      s    z0The {varname} is not input or output of op {op}.zvar_names[idx]r)   )varnamer)   NTFZelementwisedefaultc                 S   rM   r   r   rO   rF   r   r   r    rQ   <      c                 S   rM   rV   r   rW   r   r   r    rQ   @  rX   ))global_blockr+   r   r   r0   r   r#   r'   r<   r   r;   r%   r8   r@   r>   r?   r   keysr	   r"   r   set_input_dims_mappingset_output_dims_mapping
ValueErrorformatr   r   r   	Exceptionr   r.   Z	dist_attrr3   Z	impl_typeZimpl_idxr=   r   implsZis_auto_compatibleZ	serial_opr1   )programr)   r"   r+   op_valid_dist_attrsZdist_op_impl_containerr,   rB   rD   rC   Zdims_mapping_listZcomposed_dims_mapping_listZcomposed_dims_mappingr*   Z	var_namesr   r   dist_opchangedr   er`   implr   rS   r    _enum_valid_dist_attr_for_op   s   








z&PlanSpace._enum_valid_dist_attr_for_opFc                    sR  t  }|  j}|  j}tdd |d}tt| d}d}|rY|d }	t||	 }
t|dkrI|dd ||	  fddt|	D }n+t|dkrXdd t|	D }nt|dkrntj	t
 | d	}ntj	 d	}t|D ]\}}d}|}d}|dur||
 t|k r||
 n||
 d }|t|krt|d }|| }|jtjv rt }||_|jD ]}|tjv r||g  qd
d || jD }||| q|jD ]}|tjv r||g  qdd || jD }||| q|g}|dkrdn|}nt| ||}|dusJ d| d||g||j < qx|||fS )z>Enumerate valid distributed attributes for all ops in program.c                 S   s   | | S Nr   )xyr   r   r    <lambda>S  s    z<PlanSpace.enum_valid_dist_attr_for_program.<locals>.<lambda>r   Nr   c              	      s<   g | ]}t jt | |d     dqS )r   Zmesh)r   ProcessMeshnparrayreshapetolistrW   Zglobal_groupZper_process_mesh_groupZprocess_mesh_shaper   r    rQ   _  s$    z>PlanSpace.enum_valid_dist_attr_for_program.<locals>.<listcomp>c                 S   s   g | ]	}t j|gd qS )rl   )r   rm   rW   r   r   r    rQ   n  s    rl   c                 S   rM   rV   r   rW   r   r   r    rQ     rX   c                 S   rM   rV   r   rW   r   r   r    rQ     rX   r   z
Enumerate z$ valid distributed attribute failed.)r   rY   opsr+   r   r;   r<   r   r   rm   rn   ro   rp   rq   r   r0   r8   not_enum_opsr	   r"   r#   special_varsr[   r%   r'   r\   rg   descid)ra   r   Zis_pipelinevalid_dist_attr_dictrs   r+   rH   global_process_meshpipeline_process_meshesZpipeline_stagesZop_count_per_stager   r)   rb   Zop_process_meshpipeline_stager*   r,   r   r   rr   r     enum_valid_dist_attr_for_programJ  s   







z*PlanSpace.enum_valid_dist_attr_for_programNrN   )
r4   r5   r6   rt   ru   r7   r@   rL   rg   r|   r   r   r   r    r8   t   s    
"
+
~r8   c                   @   s(   e Zd Zdd Zedd Zdd ZdS )SearchAlgorithmc                 C   s
   || _ d S rh   )_name)selfr1   r   r   r    __init__     
zSearchAlgorithm.__init__c                 C   s   | j | _d S rh   )r~   r1   r   r   r   r    r1     s   zSearchAlgorithm.namec                 C   s   t d)Nz)Please Implement this method in subclass.)NotImplementedErrorr   r   r   r    search  s   zSearchAlgorithm.searchN)r4   r5   r6   r   propertyr1   r   r   r   r   r    r}     s
    
r}   c                       s   e Zd Zd fdd	Zedd Zedd Zedd	 Zd
d Zdd Z		dddZ
dd Zdd Z	dddZ	dddZdd Z  ZS )MCMC   c                    s"   t  d || _|| _|| _d S )Nmcmc)superr   _serial_program_info_max_search_times_parallelizer)r   serial_program_infoparallelizermax_search_times	__class__r   r    r     s   
zMCMC.__init__c                 C      | j S rh   r   r   r   r   r    r        zMCMC.serial_program_infoc                 C   r   rh   r   r   r   r   r    r     r   zMCMC.parallelizerc                 C   r   rh   )r   r   r   r   r    r     r   zMCMC.max_search_timesc                 C   s   |j dkrm|jD ]f}|||}|||| jkrld}|D ]E}	||	jv ra||	j  d }
|
D ].}|	||kr`|
|	| |	jD ]}t }|j|_|	||_||| | qDd} nq2|re nq |sltdqd S d S )Nr/   Fr   Tz2Change softmax_with_cross_entropy dist attr failed)r0   r#   get_op_dist_attr_for_programr$    get_tensor_dist_attr_for_programr   r'   rv   rw   r(   set_op_dist_attr_for_programr
   r"    set_tensor_dist_attr_for_programr]   )r   r)   rs   r+   dist_contextrx   r,   r   Zhas_changedZ	search_opZop_dist_attr_listr*   r1   tensor_dist_attrr   r   r    make_special_op_unshard  sj   



zMCMC.make_special_op_unshardc                 C   s*  |  j}|  j}t }|D ]j}||j  d }	tjt	|	}
|	|
 }|
|| |jD ]%}|dkr6q/||| d u rTt }|j|_|||_||| | q/|jD ]}t }|j|_|||_||| | qX| ||||| q|d ur|| |S |d ur|D ]}|| q|S )Nr   r:   )rY   rs   r+   r   rv   rw   rn   randomrandintr   r   r#   r   r
   r"   r$   r   r   r'   r(   r   Zadd_process_mesh)r   rx   ra   rz   ry   rs   r+   new_dist_contextr)   op_valid_dist_attr_listZrandom_op_dist_attrZinit_op_dist_attrr,   r   r"   r   r   r    init_program  sZ   





zMCMC.init_programNc                 C   s   d }t | j|| j}|d urdd |D nd }d}|D ] }d}| D ]}	|	jr6d|	jv r6|	jd }d} nq#|r; nqddlm}
 |
|}t	|d |||d	}|S )
Nc                 S   s   g | ]}|j qS r   )process_ids)rO   r"   r   r   r    rQ   L  s    z8MCMC.estimate_searched_strategy_cost.<locals>.<listcomp>r   Fz@RESHARDr   T)get_standalone_cost_data)clusterpipeline_configstandalone_cost_dataZ
batch_size)
r   r   r   Z	list_varsr&   r1   r%   utilsr   r   )r   r   rz   costZall_dist_main_programr   Zmicrobatch_sizera   Zsearched_batch_sizevarr   r   r   r   r    estimate_searched_strategy_costC  s@   

z$MCMC.estimate_searched_strategy_costc                 C   s   |j D ]}|j}t }||_|||_||| | q|jD ]#}|| js,|| jrC|j}t }||_|	||_||| | q d S rh   )
r'   r"   r
   r(   r   r   r#   is_parameterr&   r$   )r   r)   r*   r+   r   r,   r"   r   r   r   r    set_tensor_dist_attrm  s*   

zMCMC.set_tensor_dist_attrc                 C   s\   || |_|jD ]
}|||| _q	|jD ]}|| js#|| jr+|||| _qd S rh   )r   r"   r'   r   r#   r   r&   )r   r)   Zchanged_process_meshr+   r   r,   r   r   r    change_process_mesh  s(   

zMCMC.change_process_meshc                 C   sd  |  j}g }|D ]}|jtjvr|| q	|sJ d|  j}t|}	t	 |	_
d }
tjt|}|| }||j  d }||j  d }tjt|}t|| }|d j }|dkrt|}
tjd}|dkr|	|| | ||||	 n	|dkr|d }|dks|t|d ks|d t|d kr|
||d  j  d |d kr|	|| | ||||	 n|| }||d  j }|
| d |d kr|d t|d kr||
| d< |
| d D ]}||_q| ||d  |||	 ||
|j  d< || }||_|
|j  d D ]}||_q|	|| | ||||	 t|d ddD ]K}|
|| j  d }|
|| j  d }|| }||d kr||
|| j  d< |D ]}||_ql||	|| _| || |||	 q< n|d }|t|ks|dks|
||d  j  d |d kr|dkr|	|| | ||||	 n|| }||d  j }|
| d |d kr|dkr||
| d< |
| d D ]}||_q| ||d  |||	 ||
|j  d< || }||_|
|j  d D ]}||_q|	|| | ||||	 t|d t|D ]K}|
|| j  d }|
|| j  d }|| }||d kr~||
|| j  d< |D ]}||_qc||	|| _| || |||	 q3 n|	|| | ||||	 |D ]}|jdkr| ||||	|  nq|
d u r||	fS |
|	fS )Nz2The ops of program have no distributed attributes.r   r   r      r/   )rY   rs   r0   r8   rt   r=   r+   r>   r?   r   _dist_op_contextrn   r   r   r   rv   rw   r   r   r"   r   r<   r   r   )r   ra   rx   r   rz   Zraw_opsrs   r)   r+   r   Znew_valid_dist_attr_dictZrandom_selected_op_idxZselected_opr   r{   Zrandom_selected_dist_attr_idxZselected_op_dist_attrZ	start_idxZchanged_modeZchanged_stageZselected_op_process_meshZ
next_op_idr*   Znew_process_meshr   ZstageZvalid_dist_attr_listZ	pre_op_idr   r   r    search_once  s  






















zMCMC.search_oncec           
      C   s   d}|}|  ||j}|}|| jk r=|d7 }| | jj|||d }|  ||j}	||	 dkr8t|}|	}d}|| jk s||fS )Nr   r   )r   Zruntimer   r   r   train_programr>   r?   )
r   rx   init_dist_contextrz   timesbest_dist_contextr   min_costr   Zcur_costr   r   r    _search_core  s:   


zMCMC._search_corec                 C   s  t d t }| jj}| jj}|d u rtj nt|	d}|dks(J dt
|}d }d }d }d }	|D ]H}
t d|
 t
||
d\}}}| ||||}| |||\}}t d||
 t |_|	d u rm|n|	}	|d u ru|n|}|	|kr|}|}	q7d }d }|D ]O}
t|
dkrqt d	|
 t
||
d
\}}}| ||||}| |||\}}t d||
 t |_|d u r|n|}|d u r|n|}||kr|}|}q||	kr|}|	}t d n|}|}td}|jD ]}||j qt }t d|||  ||fS )NzStart MCMC searching.ZGPUr   zGet process failed.z7MCMC search: search process mesh {} with pipeline mode.TzJMCMC search: the min cost is {} in the process mesh {} with pipeline mode.r   z:MCMC search: search process mesh {} without pipeline mode.FzMMCMC search: the min cost is {} in the process mesh {} without pipeline mode.zHBetter set FLAGS_benchmark=1 to avoid hang problem in the pipeline mode.zBEnd MCMC searching: the min cost is {} and the search time is {}s.)printtimer   r   r   paddledistributedZget_world_sizer   Zget_all_devicesr8   rL   r^   r|   r   r   r   r   r   Z_process_meshesZ	add_ranksr   )r   
start_timer   r   rH   Zprocess_mesh_topology_listZsearched_dist_contextr   Zsearched_pipeline_dist_contextZpipeline_min_costr   rx   rz   ry   r   r   r   Z"searched_non_pipeline_dist_contextZnon_pipeline_min_costZpg0r"   Zend_timer   r   r    r     s   
		
zMCMC.search)r   rh   )r4   r5   r6   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r    r     s*    


2?
*
  
r   c                   @   sX   e Zd Z	dddZedd Zedd Zedd	 Zed
d Zdd Z	dd Z
dS )PlannerNc                 C   s"   || _ || _|| _| || _d S rh   )r   r   _algorithm_configcreate_algorithm_searcher_algorithm_searcher)r   r   r   algorithm_configr   r   r    r   0  s   
zPlanner.__init__c                 C   r   rh   r   r   r   r   r    r   :  r   zPlanner.serial_program_infoc                 C   r   rh   )r   r   r   r   r    r   >  r   zPlanner.algorithm_configc                 C   r   rh   )r   r   r   r   r    algorithm_searcherB  r   zPlanner.algorithm_searcherc                 C   r   rh   r   r   r   r   r    r   F  r   zPlanner.parallelizerc                 C   sj   | dd }|d usJ dd }|dkr1| dd }|d ur(t| j| j|}|S t| j| j}|S td)Nr1   zInvalid algorithm config.r   r   z4Other search algorithms have not been supported now.)getr   r   r   r   )r   r   r1   r   r   r   r   r    r   J  s&   z!Planner.create_algorithm_searcherc                 C   s
   | j  S rh   )r   r   r   r   r   r    r   b  r   zPlanner.searchrh   )r4   r5   r6   r   r   r   r   r   r   r   r   r   r   r   r    r   /  s    





r   )(r>   r   r   collectionsr   	functoolsr   	itertoolsr   r   numpyrn   r   Zpaddle.distributed.fleetr   Z
cost_modelr   Zdist_attributer	   r
   r   r   r   rc   r   Zoperators.commonr   r   Zprocess_groupr   r   r   r   r   seedr   r8   r}   r   r   r   r   r   r    <module>   s<   

E  B    s