o
    *j                     @   s  U d dl Z d dlmZmZ d dlmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZmZmZmZ d dlmZmZmZmZ d dlmZ d d	lmZmZ eeej f Z!eeej f Z"e"e#d
< g dZ$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*e(dd Z+dd Z,dd Z-dd Z.d d! Z/dd"d#Z0e(	$	$dd%d&Z1e(d'd( Z2e(d)d* Z3e(d+d, Z4d-d. Z5e(d/d0 Z6e(dd2d3Z7d4d5 Z8d6d7 Z9e(dd8d9Z:d:d; Z;d<d= Z<e(dd?d@Z=e(dAdB Z>ddDdEZ?dFdG Z@dHdI ZAdJdK ZBdLdM ZCe(	$	$ddNdOZDe(	 	 ddPdQZEdRdS ZFdTdU ZGdVdW ZHe(dXdY ZIe(dZd[ ZJd\d] ZKd^d_ ZLd`da ZMddcddZNe(	>ddedfZOdgdh ZPdidj ZQe(	$ddkdlZRe(dmdn ZSe(dodp ZTdqe!dreUdse	eU dtejVfdudvZW		$ddwejVdreUdse	eU dxeXdtejVf
dydzZY		$dd
e!dreUdse	eU dxeXdtejVf
d{d|ZZ	}dd
e!dreUdseUd~e[de[dteeU fddZ\dejVdtejVfddZ]dejVdejVdejVdtejVfddZ^dd Z_dS )    N)reducewraps)add)ListMutableMappingOptional)
data_utils)batched_gatherone_hottensor_tree_maptree_map)N_EXTRA_MSAN_MSAN_RESN_TPL)residue_constants)FrameRotationprotein)msadeletion_matrixmsa_maskmsa_row_mask	bert_masktrue_msa
msa_chainsc                 C   sX   |   D ]%\}}|dr|tj| |< q|jtjtjtjfv r)|tj	| |< q| S )NZ_mask)
itemsendswithtypetorchfloat32dtypeint32Zuint8int8int64)r   kv r'   p/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/science/unifold/data/data_ops.pycast_to_64bit_ints#   s   
r)   c                 C   s   t j| d jt jd| d< | S )Naatyper!   Zseq_maskr   onesshaper    r   r'   r'   r(   make_seq_mask.   s   
r0   c                 C   s"   t j| d jd t jd| d< | S )Ntemplate_aatyper   r+   template_maskr,   r/   r'   r'   r(   make_template_mask4   s   
r3   c                    s   t   fdd}|S )z#Supply all arguments but the first.c                     s    fddS )Nc                    s   | g R i S Nr'   x)argsfkwargsr'   r(   <lambda>?   s    z$curry1.<locals>.fc.<locals>.<lambda>r'   r7   r9   r8   r;   r(   fc=   s   zcurry1.<locals>.fc)r   )r8   r=   r'   r<   r(   curry1:   s   r>   c                 C   s\   | d   | d< tj}tj|tjddd| d jd }t	|d| d   | d< | S )z1Correct MSA restype to have the same order as rc.r   r+      r   )
longrcZ MAP_HHBLITS_AATYPE_TO_OUR_AATYPEr   tensorr#   	unsqueezeexpandr.   Zgather)r   Znew_order_listZ	new_orderr'   r'   r(   correct_msa_restypesD   s   rF   c                 C   s  t | d jdkrtj| d dd| d< d| v r)t | d jdkr)| d d | d< dD ];}|| v rft | | jrf| | jd }t|trf|dkrft| | r[tj| | dd| |< q+tj| | dd	| |< q+d
D ]}|| v r~t | | jr~| | d | |< qi| S )z=Remove singleton and repeated dimensions in protein features.r*      r?   dim
resolutionr@   r   )
Zdomain_namer   num_alignments
seq_lengthsequenceZsuperfamilyr   between_segment_residuesZresidue_indextemplate_all_atom_mask)Zaxis)rL   rK   )	lenr.   r   argmax
isinstanceintZ	is_tensorZsqueezenp)r   r%   Z	final_dimr'   r'   r(   squeeze_featuresP   s"   rU   c                 C   s   |dkrLt j| d j|k }d}d}t|| d |k}t|t| d | | d | d< t j| d j|k }t|t| d | | d | d< | S )z&Replace a portion of the MSA with 'X'.        r         r*   )rT   randomrandr.   r   logical_andwhereZ	ones_like)r   Zreplace_proportionr   Zx_idxZgap_idxZaatype_maskr'   r'   r(   !randomly_replace_msa_with_unknownr   s(   r]   c                 C   s:   d}t tjdd| }t t ||  |  }|S )zGenerate Gumbel Noise of given Shape.
    This generates samples from Gumbel(0, 1).
    Args:
        shape: Shape of noise to return.
    Returns:
        Gumbel noise of given shape.
    ư>r   r@   )r   
from_numpyrT   rY   uniformlog)r.   epsilonZuniform_noiseZgumbelr'   r'   r(   gumbel_noise   s   rc   c                 C   s   t | j}tj| | ddS )a;  Samples from a probability distribution given by 'logits'.
    This uses Gumbel-max trick to implement the sampling in an efficient manner.
    Args:
        logits: Logarithm of probabilities to sample from, probabilities can be
        unnormalized.
    Returns:
        Sample from logprobs in one-hot form.
    r?   rH   )rc   r.   r   rQ   logitszr'   r'   r(   gumbel_max_sample   s   
	rg   c                 C   s   t | j}tj| | dddS )a   Samples with replacement from a distribution given by 'logits'.
    This uses Gumbel trick to implement the sampling an efficient manner. For a
    distribution over k items this samples k times without replacement, so this
    is effectively sampling a random permutation with probabilities over the
    permutations derived from the logprobs.
    Args:
        logits: Logarithm of probabilities to sample from, probabilities can be
        unnormalized.
    Returns:
        Sample from logprobs in index
    r?   T)rI   Z
descending)rc   r.   r   argsortrd   r'   r'   r(   gumbel_argsort_sample_idx   s   
ri   c                 C   s4   t tj| d d }t jt dg|fddS )Nr@   r   rH   )r   r_   rT   rY   permutationcatrC   )num_seqshuffledr'   r'   r(   uniform_permutation   s   rn   c                 C   sb  t j|  dddk}t j|t jd}d|| < t|jdks!J |dd  }|dd  }|jd dkr:t dgS |d ur|dd  d}d|| < t	j
|dd\}}| }|dk }|| }|dk }	d	|d
  ||< d|| < |D ]}
|
dkr||
k}| }|dkr||  ||	|  9  < qxt |d
 }t|d }t jt dg|fddS )Nr?   rH   r   r+   g    .r@   TZreturn_counts      ?r^   )r   sumrA   
zeros_liker    rP   r.   rC   reshaperT   uniquera   ri   rk   )r   r   Zhas_msare   keyscountsZnum_has_msaZnum_pairZ
num_unpairZ
num_chainsr%   Zcur_maskZcur_cntrm   r'   r'   r(   gumbel_permutation   s8   


rw   Fc                 C   s   | d j d }t||}|st|}n|rd| v r| d nd}t| d |}t||}t|||| g\}	}
tD ] }|| v rY|rNt| | d|
| d| < t| | d|	| |< q9| S )zLSample MSA randomly, remaining sequences are stored are stored as `extra_*`.r   r   r   Nr   extra_)r.   minrn   rw   r   splitMSA_FEATURE_NAMESindex_select)r   max_seq
keep_extragumbel_sampleZbiased_msa_by_chainrl   num_selZindex_orderr   Zsel_seqZnot_sel_seqr%   r'   r'   r(   
sample_msa   s0   




r   c                 C   s(   d| v r| d dkrt |dd| } | S )Nis_distillationr@   F)r~   )r   )r   r}   r'   r'   r(   sample_msa_distillation   s   r   c                 C   s   | d j d }| d j d }|j| }||krSttjj|d |d dd d }t|d }tj	t
dg|fdd}tD ]}|| v rRt| | d|| |< qA| S )Nr   r   r@   FreplacerH   )r.   Zmax_msa_entryr   r_   rT   rY   choicerA   sortrk   rC   r{   r|   )r   configrl   Zseq_lenr}   Z
keep_indexr%   r'   r'   r(   random_delete_msa   s&   
r   c                 C   sj   | d j d }t||}ttj|d | }tD ]}d| | v r2t| d|  d|| d| < q| S )N	extra_msar   rx   )	r.   ry   r   r_   rT   rY   rj   r{   r|   )r   Zmax_extra_msarl   r   Zselect_indicesr%   r'   r'   r(   crop_extra_msa  s   
r   c                 C   s$   t D ]}d| | v r| d| = q| S )Nrx   )r{   )r   r%   r'   r'   r(   delete_extra_msa  s
   
r   c                 C   s  d| v r| d dkr| S | d j d }||jkr| S ttj|tjd|j tj}|j	r:t
jd|jd }n|j}tt
jd||g}|d d d f td| }t|d|d }t|d}ttd|d  |d  tdd  f }|jdd	\}	}
|	|
dk }|d}ttd d  |d  gd}t|d dksJ tD ]}|| v rtj| | d|d
| |< q| S )Nr   r@   r   r   r+   r?   rG   Tro   )index)r.   Zmin_num_msar   floorrC   r    Zmsa_fraction_per_blocktor"   Zrandomize_num_blocksrT   rY   randintZ
num_blocksr_   arangecliprt   viewZhstackzerosrA   rS   r{   r|   )r   r   rl   Zblock_num_seqnbZdel_block_startsZ
del_blocksZdel_indicescombinedZuniquesrv   
differenceZkeep_indicesr%   r'   r'   r(   block_delete_msa&  sL   

r   rV   c                 C   s   t t d|t d t dgd}t| d d}| d d d d d d f | }t| d d}| d d d d d d f | }|j\}}}	|j\}
}	}	||
|d }|| ||d dd}|| }t j|dd		 | d
< | S )NrX   r@   r   r      r   r   extra_msa_maskrH   extra_cluster_assignment)
r   rk   r-   r   r
   r.   r   Z	transposerQ   rA   )r   gap_agreement_weightweightsmsa_one_hotZsample_one_hotZextra_msa_one_hotextra_one_hotrl   num_res_Zextra_num_seqab	agreementr'   r'   r(   nearest_neighbor_clustersJ  s"   r   c                 C   s   t |jdkr|jd | jd ksJ |j|jd gdt | jdd   R  }|| j}|gt| jdd   }tj| d||  }|	| j
}|S )Nr@   r   r@   )rP   r.   r   rE   listr   r   Zscatter_add_floatr   r!   )dataZsegment_idsZnum_segmentsr.   rC   r'   r'   r(   unsorted_segment_sume  s   r   c                    s   d j d   fdd}d }dd  || }||dddddf td	 d
 }|td d
7 }||dddddf  d< ~||d  }|d 7 }|| d< ~S )z=Produce profile and deletion_matrix_mean within each cluster.r   r   c                    s   t | d  S )Nr   )r   r5   rl   r   r'   r(   csumu  s   
z summarize_clusters.<locals>.csumr   r^   r   Nr   r   cluster_profileextra_deletion_matrixr   cluster_deletion_mean)r.   r
   )r   r   maskZmask_countsmsa_sumdel_sumr'   r   r(   summarize_clustersq  s   (r   c                 C   s  t jdgd |g dg t jd}| d }| d }t| d d}t| d	 d}|d
d
d
d
d
f | }|d
d
d
d
d
f | }|| }	|	|	jd |	jd |	jd  }	||jd |jd |jd  }
|	|
j }t jjj	d| dd}|t 
d||9 }t j|dd}|d7 }t 
d||}||7 }||d
d
d
d
f  }| d }| d }t 
d||| }||7 }||d
d
d
f  }|| d< || d< | S )zFAssign each extra MSA sequence to its nearest neighbor in sampled MSA.rp   rX   rV   r+   r   r   r   r   r   Nr   r@   rG   g     @@rH   z
mr, nr->mnr?   znm, mrc->nrcr   r   z
nm, mc->ncr   r   )r   rC   r    r
   r   r.   Tnn
functionalZsoftmaxeinsumrq   )batchr   r   r   Z
extra_maskr   r   Zmsa_one_hot_maskedZextra_one_hot_maskedt1t2r   Zcluster_assignmentZcluster_countr   r   r   r   r   r   r'   r'   r(   nearest_neighbor_clusters_v2  sF   	"
r   c                 C   sD   d| vrt j| d jt jd| d< t j| d jd t jd| d< | S )z:Mask features are all ones, but will later be zero-padded.r   r   r+   r   r   r,   r/   r'   r'   r(   make_msa_mask  s   

r   c              	   C   s   | j d dkr?t| tjd }tjd }tjd }tt|d dgt|j  dg |d|d	d	f |d|d	d	f }n|j	g | j dR  }|d	urs| j d dkrht||d|f |d|f }||fS t
|  }||fS |S )
zCreate pseudo beta features.r   GCAZCB.Nr@      .N)r.   r   eqrB   restype_order
atom_orderr\   tilerP   	new_zerosrr   r   )r*   all_atom_positionsall_atom_maskZis_glyca_idxZcb_idxpseudo_betapseudo_beta_maskr'   r'   r(   pseudo_beta_fn  s&   

 
r    c                 C   sP   |dv sJ t | |rdnd | |d  | |rdnd \| |d < | |d < | S )	z9Create pseudo-beta (alpha for glycine) position and mask.)r   Z	template_r1   r*   r   rO   r   r   r   )r   )r   prefixr'   r'   r(   make_pseudo_beta  s   


r   c                 C   s   t || |< | S r4   r   rC   )r   keyvaluer'   r'   r(   add_constant_field  s   r   绽|=c                 C   s`   | j }|d }t| | d|g} t }|tjd tj| d|d}t||d d S )Nr?   i  r@   )	generator)	r.   r   rs   	GeneratorZmanual_seedrT   rY   r   multinomial)Zprobsrb   ZdsZnum_classesgenrv   r'   r'   r(   shaped_categorical  s   r   c                 C   s0   d| v r| S t | d d}tj|dd| d< | S )7Compute the HHblits MSA profile if not already present.hhblits_profiler      r   rH   )r
   r   mean)r   r   r'   r'   r(   make_hhblits_profile  s
   r   c                 C   sL   t | d d}| d dddddf }||9 }|jdd|jddd  S )zCompute the MSA profile.r   r   r   Nr   rH   r   )r
   rq   )r   Zohr   r'   r'   r(   make_msa_profile  s   r   c                 C   s   d| v r| S t | | d< | S )r   r   )r   r/   r'   r'   r(   make_hhblits_profile_v2  s   r   c                 C   s   d|vr| S |d }|d }|d }|  }|dk}|D ]*}||k}t|| d }	|	dkrF||@ }
| d d |
f }|d|	| d d |f< q| S )Nnum_sym	entity_idsym_idr@   r   )rt   rS   repeat)mask_positionr   r   r   r   Zunique_entity_idsZfirst_sym_maskZcur_entity_idZcur_entity_maskZcur_num_symZcur_sym_maskZcur_sym_bert_maskr'   r'   r(   share_mask_by_entity  s$   r   c                 C   sn  t jdgd ddg t jd}|j| |j| d   |jt| d d  }ttt	dd	 t
t|jD }d
|d
< d|j |j |j }|dksKJ t jjj|||d}| d j}	t tjj|	 |k }
|
| d  M }
d| v ry|
| d  M }
|rt|
| }
|rt |d }t|}nt|}t |
|| d }|| d  9 }|
t j| d< | d | d< || d< | S )z Create data for BERT on raw MSA.g?rW   rV   r+   r   r   r   c                 S      g | ]}d qS )r   r   r'   .0r   r'   r'   r(   
<listcomp>@      z#make_masked_msa.<locals>.<listcomp>r@   rp   )r   r   r   r^   r   )r   rC   r    Zuniform_probZprofile_probZ	same_probr
   r   r   r   rangerP   r.   r   r   padr_   rT   rY   rZ   boolr   ra   rg   r   r\   rA   r   )r   r   Zreplace_fractionr   Z
share_maskZ	random_aaZcategorical_probsZ
pad_shapesZ	mask_probshr   re   Zbert_msar'   r'   r(   make_masked_msa/  sB   


r   c              	      sX  ddd}|dur$d| v r| d j d n| d j d }||kr$||d}d	| v r8| d	 j d }||kr8||d
}t|t|t|t|i |  D ]c\}	|	dkrOqFtj }
||	 }d}t|
t|ksqJ | d|	 d|
 d|  fddt|
|D }fddt	|D }|
  ttj| }|rtjj|| |	< t| |	 || |	< qF| S )z;Guess at the MSA and sequence dimension to make fixed size.   c                 S   s   t || | d | | S )Nr@   max)Zcur_sizeZ
multiplierr'   r'   r(   get_pad_sizej  s   z%make_fixed_size.<locals>.get_pad_sizeNr*   r   r   r@   r      r   z0Rank mismatch between shape and shape schema for z: z vs c                    s    g | ]\}}  |d p|qS r4   )get)r   s1s2)pad_size_mapr'   r(   r     s    z#make_fixed_size.<locals>.<listcomp>c                    s"   g | ]\}}d | j |  fqS r   r.   )r   ip)r&   r'   r(   r     s   " )r   )r.   r   r   r   r   r   r   rP   zip	enumeratereverse	itertoolschainr   r   r   r   rs   )r   shape_schemaZmsa_cluster_sizeZextra_msa_sizer   num_templatesr   Zinput_num_resZinput_extra_msa_sizer%   r.   ZschemamsgZpad_sizepaddingr'   )r   r&   r(   make_fixed_size_  sF   



.
r  c                 C   s   | d   | d< d| v rt| d tjdd}n%tj| d tjd}d| v r;| d }tj|ddd	d }d
||< | }t| d d}tj	|dd|g}tj
|dd| d< | S )$Create and concatenate MSA features.r*   rN   r   r@   r+   asym_lenr?   rH   Nrp   rX   target_feat)rA   r   r   r   r    rr   Zcumsumr   r
   rD   rk   )r   Z	has_breakr
  Zentity_endsZaatype_1hotr  r'   r'   r(   make_target_feat  s"   r  c                 C   s   t | d d}t| d dd}t| d d dtj  }|tj|dd	tj|dd	g}d
| v rMt| d d dtj  }|| d
 tj|dd	g d| v rlt| d dd| d< t| d d dtj  | d< tj|dd	| d< | S )r	  r   r   r   rV   rp         @       @r?   rH   r   r   r   extra_msa_has_deletionextra_msa_deletion_valuemsa_feat)	r
   r   r   atanrT   pirD   extendrk   )r   msa_1hothas_deletiondeletion_valuer  deletion_mean_valuer'   r'   r(   make_msa_feat  s@   



r  c                 C   s   t | d d}| d }t|ddd }t|d dtj  d }t| d	 d dtj  d }|||| d
 |g}tj|dd| d< | S )r	  r   r   r   rV   rp   r   r  r  r   r   r?   rH   r  )r
   r   r   r  rT   r  Zarctanrk   )r   r  r   r  r  r  r  r'   r'   r(   make_msa_feat_v2  s"   r  c                 C   sz   | d d | }| d d | }t |dd}t |d dtj  }| d d | }|| d< || d< || d< || d	< | S )
Nr   r   rV   rp   r  r  r   r  r  )r   r   r  rT   r  )r   Znum_extra_msar   r   r  r  r   r'   r'   r(   make_extra_msa_feat  s   r  c                    s    fdd|   D S )Nc                    s   i | ]\}}| v r||qS r'   r'   r   r%   r&   feature_listr'   r(   
<dictcomp>  s    zselect_feat.<locals>.<dictcomp>)r   )r   r  r'   r  r(   select_feat  s   r   c                 C   s   d| v r| S t jtjt j| d jd}t jtjt j| d jd}t jtjt j| d jd}t jtj	t j| d jd}| d 
 }|| 
 | d< || 
 | d< || | d< || | d< | S )z>Construct denser atom positions (14 dimensions instead of 37).atom14_atom_existsr*   r!   deviceresidx_atom14_to_atom37Zresidx_atom37_to_atom14Zatom37_atom_exists)r   rC   rB   restype_atom14_to_atom37r$   r#  restype_atom37_to_atom14restype_atom14_maskr    restype_atom37_maskrA   )r   r%  r&  r'  r(  Zprotein_aatyper'   r'   r(   make_atom14_masks  sH   r)  c                 C   s,   t dd | tj} t| }tdd |}|S )Nc                 S   
   t | S r4   r   )nr'   r'   r(   r:        
 z&make_atom14_masks_np.<locals>.<lambda>c                 S   r*  r4   )rT   array)tr'   r'   r(   r:     r,  )r   rT   ndarrayr)  r   )r   outr'   r'   r(   make_atom14_masks_np  s   r1  c           
   	   C   s8  | d   | d< | d  | d< | d  | d< | d }| d }|t| d |dt| d jdd d }|d	 t| d |d
t| d jdd
 d }|| d< || d< || d< tjtj| d j	| d j
d}|| d  }td||}|| d< td||}|| d< tjtj| d j	| d j
d}	|	| d  | d< | S )z?Constructs denser atom positions (14 dimensions instead of 37).r*   r   r   r!  r$  r?   NrI   Znum_batch_dimsr   Zatom14_gt_existsZatom14_gt_positionsr"  z...rac,...rab->...rbcZatom14_alt_gt_positionsz...ra,...rab->...rbZatom14_alt_gt_existsZatom14_atom_is_ambiguous)rA   r   r	   rP   r.   r   rC   rB   renaming_matricesr!   r#  r   restype_atom14_is_ambiguous)
r   Zresidx_atom14_maskr$  Zresidx_atom14_gt_maskZresidx_atom14_gt_positionsr4  Zrenaming_transformZalternative_gt_positionsZalternative_gt_maskr5  r'   r'   r(   make_atom14_positions!  s^   r6  :0yE>c                    s  | d }| d }| d }t |jd d }tjg ddtd}g d|d d d	d d f< g d
|d d dd d f< ttjD ]-\}}tj| }	t	dD ]}
tj
| |
 rjtj|	 |
 }|dd  |||
d d d f< qKq>|g |jd d ddR }d|d< d|d< |tj
|dd ddd f< tj  d	 d< t fdd}||}||}|jg d| |jR  }t||d|d}t||dt |jd d d}tj|dd	d d f |ddd d f |ddd d f |d}t||d|d}t||dt |jd d d}tj|ddd	 | }tjd|j|jd}t|g d| dddR }d|d< d|d< t|d }|t|d }|jg d| ddR  }tjd|j|jd}t|g d| ddddR }tj D ]7\}	}tjtj |	  }t!t"tj
| d }
d|d||
d f< d|d||
d ddf< d|d||
d ddf< qst||d|d}t||d!|d}t|d }|t|d }|# }|# }|| d"< || d#< || d$< || d%< || d&< | S )'Nr*   r   r   r?   )rX   r   r   r   r+   )Cr   Nr   )r   r8  Or   r   r@   rX   r   .r   ).r   .rW   c                    s    |  S r4   r'   r5   Zlookuptabler'   r(   r:     s    z"atom37_to_frames.<locals>.<lambda>r   r2  r3  rG   )Zp_neg_x_axisoriginZ
p_xy_planeepsrH   r"  ).r   r   r   ).r   rG   rG   )Zmatrigidgroups_gt_framesrigidgroups_gt_existsZrigidgroups_group_existsZrigidgroups_group_is_ambiguousZrigidgroups_alt_gt_frames)$rP   r.   rT   fullobjectr   rB   ZrestypesZrestype_1to3r   chi_angles_maskZchi_angles_atomsr   
new_tensorr   copyZ	vectorizer   r	   r   from_3_pointsr   ry   eyer!   r#  r   r   ZcomposeZresidue_atom_renaming_swapsr   r   Zrestype_3to1rS   rq   Zto_tensor_4x4)r   r?  r*   r   r   Z
batch_dimsZ"restype_rigidgroup_base_atom_namesrestypeZrestype_letterZresnameZchi_idxnamesZrestype_rigidgroup_masklookupZ"restype_rigidgroup_base_atom37_idxZ!residx_rigidgroup_base_atom37_idxZbase_atom_posZ	gt_framesZgroup_existsZgt_atoms_existZ	gt_existsZrotsZrestype_rigidgroup_is_ambiguousZrestype_rigidgroup_rotsr   Zresidx_rigidgroup_is_ambiguousZresidx_rigidgroup_ambiguity_rotZalt_gt_framesZgt_frames_tensorZalt_gt_frames_tensorr'   r<  r(   atom37_to_framesb  s   




"

rM  c              	   C   s8  | |d  }| |d  }| |d  }|j d dkrL|j }|jg |ddR  | |d < |jg |ddR  | |d	 < |jg |dR  | |d
 < | S tj|dd}|g |j d d ddd}tj||dd dd d d d f gdd}|g |j d d dd}tj||dd dd d f gdd}tj|dddd d f |dd dd d f gdd}	tj|dddd d f |dd dd d f gdd}
tj|dd dd d f |dddd d f gdd}tj|dddf ddtj|dd df dd }|d tj|dd df d|jd }tj|dd df d|jd|d  }tjtj	|j
d}|d|d d d d f }t||dt|j d d }ttj}|g d ||}||d d f }t||dt|j d d d}tj|d|jd}|| }tj|	dd d d d d f |
dd d d d d f |dd d d d d f |gdd}tj|d |d |d |gdd}tj|ddd d f |ddd d f |ddd d f dd}| |ddd d f }tj|d |d gdd}ttjt|d|jd d!d }|| }||g d"d#t|j d d  td d f   }|tj|df }tj|jg |j dR  d$d%|  gdd}||d  }|d&krtjt|j d d t|j d d gdd}||d  |d|d    }||d  |d|d    }|| |d < || |d	 < || |d
 < | S )'Nr*   r   r   r?   r      rG   torsion_angles_sin_cosalt_torsion_angles_sin_costorsion_angles_maskrW   r   r=  r@   %   r   .rH   r3  r      ).rG   )rI   r!   ).r   r#  )rV   rV   rV   rV   r2  r   r7  )r?  ).r@   T)rI   r!   Zkeepdims)rp   rp   g      rp   rp   rp   rp   r4   rp   r  r   )r.   r   r   clamprk   prodr!   Z	as_tensorrB   chi_atom_indicesr#  r	   rP   r   rE  appendrF  r   rH  invertapplystacksqrtrq   ZsquaresliceZchi_pi_periodicZnew_onesr-   r   )r   r   r*   r   r   Z
base_shaper   Zprev_all_atom_positionsZprev_all_atom_maskZpre_omega_atom_posZphi_atom_posZpsi_atom_posZpre_omega_maskZphi_maskZpsi_maskrW  Zatom_indicesZchis_atom_posrE  Z	chis_maskZchi_angle_atoms_maskZtorsions_atom_posrQ  Ztorsion_framesZfourth_atom_rel_posrO  denomZchi_is_ambiguousZmirror_torsion_anglesrP  Zplaceholder_torsionsr'   r'   r(   atom37_to_torsion_angles  sP    $***







	
r_  c                 C   s4   | d ddd d d d f | d< | d d | d< | S )NrA  .r   Ztrue_frame_tensorrB  r;  Z
frame_maskr'   r/   r'   r'   r(   get_backbone_frames  s
   
r`  c                 C   sP   | d j }| d ddd d d f || d< | d ddd f || d< | S )Nr   rO  .r   Zchi_angles_sin_cosrQ  Zchi_mask)r!   r   )r   r!   r'   r'   r(   get_chi_angles  s
   
ra  c                 C   s  d| v r| d j d }nd}|dkr|r0t|tjd|d }tjtjj||ddtjd}ntj	t||tjd}| 
 D ]H\}}|drz|| }W n4 ty } z(t|j| td	| t||j  td
|  tddd | 
 D  W Y d }~nd }~ww || |< q?| S )Nr2   r?   r   r@   Fr   r+   templater  zprotein:zprotein_shape:c                 S   s$   i | ]\}}d t |v r||jqS r   )dirr.   r  r'   r'   r(   r    s    z"crop_templates.<locals>.<dictcomp>)r.   ry   rT   rY   r   r   rC   r   r$   r   r   
startswith	Exceptionprint	__class__)r   Zmax_templatesZsubsample_templatesr  Ztemplate_idxr%   r&   exr'   r'   r(   crop_templates  sD   



ri  c                 C   s@   d| v r| d j d n| d j d }t|||}t| ||} | S )crop to size.r*   r   r   r@   )r.   get_single_crop_idxapply_crop_idx)r   	crop_sizer  seedr   crop_idxr'   r'   r(   crop_to_size_single  s   rp  c           	      C   s   t j|dd tj |k }W d   n1 sw   Y  d| v o'| d dk}|r3t|||d| S |r=t| |||}nt| ||}t| ||S )rj  Zmultimer_cropr   Nr   r@   )rm  r  rn  )	r   
numpy_seedrT   rY   rZ   rp  get_spatial_crop_idxget_contiguous_crop_idxrl  )	r   rm  r  rn  Zspatial_crop_probca_ca_thresholdZuse_spatial_cropr   ro  r'   r'   r(   crop_to_size_multimer  s(   
rv  r   rm  random_seedreturnc                 C   sj   | |k r	t | S t| ttjd| | d }t ||| W  d    S 1 s.w   Y  d S )Nr   r@   )r   r   r   rr  rS   rT   rY   r   )r   rm  rw  Z
crop_startr'   r'   r(   rk    s   
$rk  r
  use_multinomialc              	   C   sD  |st j|dd tjt| }W d   n1 sw   Y  |  }t|}dd | D }t	|D ]K\}}	| |	 }
||
8 }t
||
}t
|
td|| }t j||dd ttjjt|t|d d	}W d   n1 ssw   Y  ||8 }|||	< q5t|}|S | t|  }ttjj||d
}t
|| }|S )z"get crop sizes for contiguous cropZmultimer_contiguous_permrq  Nc                 S   r   r   r'   r   r'   r'   r(   r     r   z-get_crop_sizes_each_chain.<locals>.<listcomp>r   Zmultimer_contiguous_crop_sizer@   )lowhigh)Zpvals)r   rr  rT   rY   rj   rP   rq   r   rC   r   ry   r   rS   r   r_   r   )r
  rm  rw  ry  Zshuffle_idxZnum_leftZ
num_budget
crop_sizesjidxZthis_lenmax_sizeZmin_sizeZthis_crop_sizeZentity_probsr'   r'   r(   get_crop_sizes_each_chain  sB   



r  c              	   C   s   | d j d }||krt|S d| v sJ | d }t||||}g }tjdtjd}tj|dd2 t||D ]$\}	}
t	j
dt|	|
 d }|t|| || |
  ||	7 }q8W d    n1 sgw   Y  t|S )Nr*   r   r
  r+   Z"multimer_contiguous_crop_start_idxrq  r@   )r.   r   r   r  rC   r$   r   rr  r   rT   rY   r   rS   rX  rk   )r   rm  rw  ry  r   r
  r|  Z	crop_idxsZasym_offsetZllZcszZ
this_startr'   r'   r(   rt    s0   




	rt       L@ru  infc                 C   s4  t jd }| d d|d d f }| d d|f  }|jdddk r*t| ||S |d |dd d d f  }t|}	t|	| d	 ||}
t	|
rit
j|d
d ttj|
}W d    n1 scw   Y  nt| ||S |	| }||| < tjd|jd |jd d }||7 }t|d | }| jS )Nr   r   .r   r?   rH   r@   r   asym_idZmultimer_spatial_croprq  r   rT  gMbP?)rB   r   r   rq   allrt  get_pairwise_distancesget_interface_candidatesr   anyr   rr  rS   rT   rY   r   r   r.   r#  r   rh   r   values)r   rm  rw  ru  r  r   Z	ca_coordsZca_mask	pair_maskca_distancesinterface_candidatesZ
target_resZto_target_distancesZ	break_tieretr'   r'   r(   rs  .  s<   



rs  coordsc                 C   s,   |  d|  d }ttj|d ddS )Nr3  r=  rG   r?   rH   )rD   r   r\  rq   )r  Z
coord_diffr'   r'   r(   r  W  s   r  r  r  r  c                 C   s\   |d |dd d d f k}| d|    | } tj| dk| |k @ dd}|jddd }|S )	Nr   .rp   r   r?   rH   T)as_tuple)r   r   rq   Znonzero)r  r  r  ru  Zin_same_asymZcnt_interfacesr  r'   r'   r(   r  \  s   r  c                 C   sX   i }|   D ]#\}}||vrqt|| D ]\}}|tkr$t|||}q|||< q|S r4   )r   r   r   r   r|   )r   r  ro  Zcropped_proteinr%   r&   r   Zdim_sizer'   r'   r(   rl  l  s   
rl  r4   )FF)rV   )r   )r   r   )r7  )F)NF)r  )`r  	functoolsr   r   operatorr   typingr   r   r   numpyrT   r   Zunicore.datar   Zunicore.utilsr	   r
   r   r   Z(modelscope.models.science.unifold.configr   r   r   r   Z&modelscope.models.science.unifold.datar   rB   Z/modelscope.models.science.unifold.modules.framer   r   strr/  Z	NumpyDictZ	TorchDict__annotations__r{   r)   r0   r3   r>   rF   rU   r]   rc   rg   ri   rn   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r)  r1  r6  rM  r_  r`  ra  ri  rp  rv  rS   ZTensorrk  r   r  rt  r   rs  r  r  rl  r'   r'   r'   r(   <module>   s&  

"

"



#3



	/6

%
A   	-





)
#
)
