o
    *jO                     @   s   d dl Z d dlmZ d dlZd dlmZmZ d dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e Zejej dG dd deZ!dS )    N)DictOptional)LambdaLR)
DataLoader)Trainers)Model)TableQuestionAnswering)BaseTrainer)TRAINERS)	ModelFile)
get_logger)module_namec                   @   s   e Zd Zd*dedefddZ	d+ddZd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zd,ddZd*d d!Z	"	#	$	%	&	&	'd-d(d)ZdS ).TableQuestionAnsweringTrainerNmodelcfg_filec                 O   s$   t || _|d | _|d | _d S )Ntrain_dataseteval_dataset)r   Zfrom_pretrainedr   r   r   )selfr   r   argskwargs r   y/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/nlp/table_question_answering_trainer.py__init__   s   
z&TableQuestionAnsweringTrainer.__init__c                    s    dt f fdd}t|||S )z 
        set scheduler.
        current_stepc                    sB   | k rt | t td S tdt  |  t td   S )N           )floatmax)r   num_training_stepsnum_warmup_stepsr   r   	lr_lambda,   s   
zPTableQuestionAnsweringTrainer.get_linear_schedule_with_warmup.<locals>.lr_lambda)intr   )r   Z	optimizerr!   r    Z
last_epochr"   r   r   r   get_linear_schedule_with_warmup#   s   	z=TableQuestionAnsweringTrainer.get_linear_schedule_with_warmupc                 C   $   g }|D ]}| t|d  q|S )E
        [ [wc, wo, wv],
        [wc, wo, wv], ...
        ]
        r   appendr#   )r   condsZwc1condr   r   r   get_wc16      z%TableQuestionAnsweringTrainer.get_wc1c                 C   r%   )r&   r   r'   )r   r)   Zwo1r*   r   r   r   get_wo1A   r,   z%TableQuestionAnsweringTrainer.get_wo1c                 C   r%   )r&      )r(   str)r   r)   Zwv1r*   r   r   r   get_wv1L   r,   z%TableQuestionAnsweringTrainer.get_wv1c                 C   s    t ||d D ]}|||< q|S )Nr   )range)r   datastartendvalueir   r   r   set_from_toW   s   
z)TableQuestionAnsweringTrainer.set_from_toc              
   C   s  g }g }g }g }g }g }	g }
g }g }g }t |D ]a\}}t|d |d< t|d }t|d }t|d | d }tt|d | }t |D ]\}}||| krdd||< || dkrd|d8 }qL|dd t| jjt| D 7 }|dd t| jjt| D 7 }|	| |	| |
	|dkrdn| t
|d |d< t
|d |d< t|d t|d ksJ |	|| d  |	|d  t|d	 }d
d |dddf D }t|}|	| || |d	< t|d dk syt|}tt| t|| d }tt| t|| }t |D ]\}}||| kr2d||< |d8 }q|dd t| jjt| D 7 }|dd t| jjt| D 7 }|	| |	|dkrddn| |	| |		| t||  qt||||||	||
||f
S )zB
        for backward compatibility, separated with get_g
        selr   aggr   c                 S      g | ]}d qS r   r   .0_r   r   r   
<listcomp>x       z7TableQuestionAnsweringTrainer.get_g.<locals>.<listcomp>c                 S   r:   r;   r   r<   r   r   r   r?   |   r@   cond_conn_opr)   c                 S   s   g | ]}t |qS r   )r#   )r=   xr   r   r   r?      s    Nc                 S   r:   r;   r   r<   r   r   r   r?      r@   c                 S   r:   r;   r   r<   r   r   r   r?      r@   )	enumeratenumpyasarrayZargsortlenlistr1   r   Zmax_select_numr(   sortarrayr+   r-   max_where_numr0   EnvironmentError)r   sql_il_hsactiong_scg_sag_wng_wcg_wog_wvg_sleng_actiong_cond_conn_opidxsbZpsql_i1idxslenZsid_listZ	said_listr6   Zsidr)   Z	conds_numZwlenZwcd_listZwod_listZwcdr   r   r   get_g\   s   





z#TableQuestionAnsweringTrainer.get_gc              	   C   s  d}|D ]}||kr|}q|d7 }g }g }g }t |D ]a\}	}
dg| }dg| jj }dg| jj }t tt|
d ||	  D ]+\}}|\}}|dkrR|dkrRqA| ||d |d |d  |d ||< |d ||< qA|| || || q|||ffS )z
        Generate SQuAD style start and end index of wv in nlu. Index is for of after WordPiece tokenization.

        Assumption: where_str always presents in the nlu.
        r   r.   Zwvi_corenlpir   )rC   r   rJ   rG   rD   rE   r7   r(   )r   Zg_wvi_corenlpl_nrX   Zmax_lelemg_wviZg_wv_psZg_wv_perY   Zt_objZg_wvi1Zg_wvss1Zg_wvse1Zi_wnZg_wvi_corenlp11Zst_idxZed_idxr   r   r   !get_g_wvi_bert_from_g_wvi_corenlp   s4   


z?TableQuestionAnsweringTrainer.get_g_wvi_bert_from_g_wvi_corenlpc                 C   s$   t jj|t || jj}|S N)torchnn
functionalcross_entropytensortor   device)r   s_ccorW   lossr   r   r   	loss_scco   s
   z'TableQuestionAnsweringTrainer.loss_sccoc                 C   s  d}|t jj|d|t |d| jj7 }|t jj|d| jj	t |	d| jj7 }|\}}|| 
||7 }|| 
||7 }|| 
||
7 }|| 
||7 }|t jj|d|t |d| jj7 }|t jj|d| jjt |d| jj7 }|\}}|t jj|d|jd t |d d| jj7 }|t jj|d|jd t |d d| jj7 }|S )Nr   r   r   )rb   rc   rd   re   Zreshaperf   rg   r   rh   Z	n_agg_opsrk   Z
n_cond_opsshape)r   s_actions_scs_sari   s_wcs_wos_wvsrO   rP   rQ   rR   rS   r_   rW   rU   g_wvp	max_h_lens_lenrV   rj   Zs_slenZs_wlenZs_wvs_sZs_wvs_er   r   r   
loss_sw_se   s@   







z(TableQuestionAnsweringTrainer.loss_sw_sec           
      C   s   t |t |kr||fS i }t|D ]
\}}|| ||< qt| dd d}g }g }|D ]}	||	d  ||	d  q-||fS )Nc                 S   s   | d S )Nr   r   )dr   r   r   <lambda>   s    z<TableQuestionAnsweringTrainer.sort_agg_sel.<locals>.<lambda>)keyr   r   )rF   rC   sorteditemsr(   )
r   ZaggsZselsZseldicr6   r8   ZapsZnew_aggsZnew_selsZapr   r   r   sort_agg_sel   s   z*TableQuestionAnsweringTrainer.sort_agg_selc                 C   s   g }|D ]c}t |dkr|| qt |}t|D ]:\}}|d |d k r*|} n+|d |d krT|d }|d }	||}
||	}|
dkrT|dkrT|
|k rT|} nq|t |kra|| q||| q|S )Nr   r.   r   )rF   r(   rC   findinsert)r   nlur)   Znewcondsr*   rZ   r6   ZnewcondvalZnewvalZvalidxZ	newvalidxr   r   r   
sort_conds  s.   


z(TableQuestionAnsweringTrainer.sort_condsr   c           6      C   s  t |t |kst |dkrd S d\}}}}}d\}	}
}}d\}}}}ttj|ddD ]\}}|d }|d }|| }|d }|d }|d }i i }}||krSq,|d	7 }d
}t |d t |d krzt |d t |d krz|d	7 }d |d< nd}d |d< |d d |d kr|	d	7 }	d |d< nd}d |d< |d |d kr|d	7 }d |d< nd}d |d< t |d t |d kr|d	7 }d |d< nd}d |d< |tt |d t |d 7 }| |d |d \}}| |d |d \}}t|D ]G\} }!| t |k r||  |!kr|
d	7 }
d |d< nd}d |d< | t |k r7| t |k r7||  ||  kr7|d	7 }d |d< qd}d |d< q|tt |d t |d 7 }| ||d }"| ||d }#t|"D ]\} }$| t |#krn n|#|  }%|$d |%d kr|d	7 }d |d< nd}d |d< |$d	 |%d	 kr|d	7 }d |d< nd}d |d< d}&zt|%d |%d d	 d	D ]}'|&||'  7 }&qW n ty   d}&Y nw |& 	 }(|$d  	 })|%d  	 }*|)|*v s|)|(v s|(|)v s|*|)v r|d	7 }d |d< qad}d |d< qa|r|d	7 }q,||d  }+|	|d  },||d  }-||d  }.||d  }/|
|d  }0||d  }1||d  }2||d  }3||d  }4t
d||+|,|-|.|/|0|1|2|3|4f  |+|,|-|.|/|0|1|2|3|4d
}5|5S ) Nr   )r   r   r   r   r   )r   r   r   r   evaluatedescquestionsqlZquestion_tokr   Tr8   r9   zselect numberFrN   rA   zcondition operatorr)   zwhere numberzselect aggregationzselect columnzwhere columnzwhere operator ZstartIdZendIdr.   zwhere value{Gz?z{STATIS} [epoch=%d] all_ratio: %.3f, act_ratio: %.3f, sc_len_ratio: %.3f, cco_ratio: %.3f, wc_len_ratio: %.3f, s_agg_ratio: %.3f, s_col_ratio: %.3f, w_col_ratio: %.3f, w_op_ratio: %.3f, w_val_ratio: %.3f)
accuracyZaction_accuracyZselect_length_accuracyZconnector_accuracyZwhere_length_accuracyZselect_aggregation_accuracyZselect_column_accuracyZwhere_column_accuracyZwhere_operator_accuracyZwhere_value_accuracy)rF   rC   tqdmr   r|   r   r1   strip	Exceptionlowerloggerinfo)6r   ZanswersresultsepochZall_sumZ	all_rightZsc_lenZccoZwc_lenZactZs_aggZall_colZs_colZall_wZw_colZw_opZw_valrZ   itemr   ZqaSQLresultr   r   ZquestionTokenZrightserrorsrightZaaggsZaselsZraggsZrselsjr9   ZacondsZrcondsr*   Zpcondr5   kZvaluelownormalZvaluenormal	all_ratioZ	act_ratioZsc_len_ratioZ	cco_ratioZwc_len_ratioZs_agg_ratioZs_col_ratioZw_col_ratioZ
w_op_ratioZw_val_ratiometricsr   r   r   calculate_scores  s   
$




.





	z.TableQuestionAnsweringTrainer.calculate_scoresc                 C   s   ddi}|durEt j|dd}| jj|d  | jjj|d dd	 g }tj| jd
dD ]}| j|gd }|	| q-| 
| j|}|S )z#
        Evaluate testsets
        r   r   NT)Zweights_onlybackbone_model
head_modelF)strictpredictr   r   )rb   loadr   r   Zload_state_dictr   r   r   r   r(   r   )r   Zcheckpoint_pathr   
state_dictr   r2   r   r   r   r   r     s   z&TableQuestionAnsweringTrainer.evaluate      h㈵>Mb@?r   皙?c           E      C   s  t || jdddd d}t|| }	t||	 }
tjjtdd | jj	
 ||d}tjjtdd | jj
 ||d}| ||
|	}| ||
|	}d	}td
|d
 D ]}| jj	  | jj  t|D ]\}}| jj|ddd\}}}}}}}}}}}| j| jj| jj||||||||
\}}} }!}"}#}$}%}&}'}(| ||%|\
})}*}+},}-}.}/}0}1}2| ||#|2\}3}4| j	||#|%|&|'| |(\}5}6}7}8}9}:};}<t|%}=| |5|6|7|8|9|:|;|)|*|+|,|-|3|/|0|4|=|<|1}>td|||t||> f  |  |  |>  |  |  |  |  qfg }?tj| jddD ]}@| j|@gd }A|? |A q| j!| j|?|d}B|Bd |krj|Bd }t"j#$| jj%d}C| jj	& | jj& d}Dt'|D|C td||Bd |Cf  qSdS )z'
        Fine-tuning trainsets
        T   c                 S   s   | S ra   r   )rB   r   r   r   rx     s    z5TableQuestionAnsweringTrainer.train.<locals>.<lambda>)
batch_sizeZdatasetshuffleZnum_workersZ
collate_fnc                 S      | j S ra   Zrequires_gradpr   r   r   rx         )lrZweight_decayc                 S   r   ra   r   r   r   r   r   rx     r   r   r   N)trainz.{train} [epoch=%d/%d] [batch=%d/%d] loss: %.4fr   r   r   )r   r   zfinetuned_model.bin)r   r   z3epoch %d obtain max score: %.4f, saving model to %s)(r   r   rF   r#   rb   ZoptimZAdamWfilterr   r   
parametersr   r$   r1   r   rC   Zget_fields_infoZget_bert_output	tokenizerr\   r`   r   rv   r   r   r   Z	zero_gradZbackwardstepr   r   r   r(   r   ospathjoinZ	model_dirr   save)Er   r   Ztotal_epochesZbackbone_learning_rateZhead_learning_rateZbackbone_weight_decayZhead_weight_decayZwarmup_ratioZtrain_loaderZtotal_train_stepsZwarmup_stepsoptZopt_bertZlr_schedulerZlr_scheduler_bertZmax_accuracyr   ZiBr   r   Znlu_trL   Zq_knowZt_knowrN   Zhs_ttypesZunitsZhis_sqlZschema_linkZall_encoder_layerr>   tokensZi_nluZi_hdsr]   Zl_hpurM   start_indexcolumn_indexZidsrO   rP   rQ   rR   rS   rT   rW   rU   rV   rX   r_   rs   rm   rn   ro   ri   rp   rq   rr   ru   rt   Zloss_allr   r2   r   r   Z
model_pathr   r   r   r   r     s   





z#TableQuestionAnsweringTrainer.trainra   )r   r;   )r   r   r   r   r   r   r   )__name__
__module____qualname__r/   r   r$   r+   r-   r0   r7   r\   r`   rk   rv   r|   r   r   r   r   r   r   r   r   r      s2    	
L&$
 
r   )"r   Zos.pathr   Zosptimetypingr   r   jsonrD   rb   r   Ztorch.optim.lr_schedulerr   Ztorch.utils.datar   Zmodelscope.metainfor   Zmodelscope.modelsr   Z9modelscope.models.nlp.space_T_cn.table_question_answeringr   Zmodelscope.trainers.baser	   Zmodelscope.trainers.builderr
   Zmodelscope.utils.constantr   Zmodelscope.utils.loggerr   r   Zregister_moduleZ table_question_answering_trainerr   r   r   r   r   <module>   s(   