o
    *j,                  	   @   s   d Z ddlZddlZddlmZmZmZ ddlZddl	m
Z
 ddlmZ eeejf Zeeef ZdZeeZejddG d	d
 d
Zddedee defddZdefddZdedefddZdedejfddZ	ddededeej defddZ	ddedeej defddZdS )zProtein data type.    N)AnyMappingOptional)	PDBParser)residue_constantsZ>ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789T)frozenc                   @   sV   e Zd ZU dZejed< ejed< ejed< ejed< ejed< ejed< dd	 Zd
S )Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_indexchain_index	b_factorsc                 C   s(   t t| jtkrtdt dd S )Nz(Cannot build an instance with more than z6 chains because these cannot be written to PDB format.)lennpuniquer   PDB_MAX_CHAINS
ValueError)self r   o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/science/unifold/data/protein.py__post_init__<   s
   
zProtein.__post_init__N)__name__
__module____qualname____doc__r   ndarray__annotations__r   r   r   r   r   r       s   
 





r   pdb_strchain_idreturnc              	      s  t | }tdd}|d|}t| }t|dkr&tdt| d|d }g }g }g }	g }
g }g }|D ]}|durD|j|krDq8|D ]}|jd	 d
kr^td|j d|jd  dt	j
|jd}t	j|t	j}tt	jdf}tt	jf}tt	jf}|D ]#}|jt	jvrq|j|t	j|j < d|t	j|j < |j|t	j|j < qt|dk rqF|| || |	| |
|jd  ||j || qFq8t|}dd t|D  t fdd|D }tt|t|	t|t|
|t|dS )a  Takes a PDB string and constructs a Protein object.

    WARNING: All non-standard residue types will be converted into UNK. All
      non-standard atoms will be ignored.

    Args:
      pdb_str: The contents of the pdb file
      chain_id: If chain_id is specified (e.g. A), then only that chain
        is parsed. Otherwise all chains are parsed.

    Returns:
      A new `Protein` parsed from the pdb contents.
    T)ZQUIETnone   z,Only single model PDBs are supported. Found z models.r   N    z(PDB contains an insertion code at chain z and residue index z. These are not supported.X         ?      ?c                 S   s   i | ]\}}||qS r   r   ).0ncidr   r   r   
<dictcomp>       z#from_pdb_string.<locals>.<dictcomp>c                    s   g | ]} | qS r   r   )r)   r+   Zchain_id_mappingr   r   
<listcomp>   s    z#from_pdb_string.<locals>.<listcomp>)r	   r   r
   r   r   r   )ioStringIOr   Zget_structurelistZ
get_modelsr   r   idr   Zrestype_3to1getZresnameZrestype_orderrestype_numr   ZzerosZatom_type_numname
atom_typesZcoordZ
atom_orderZbfactorsumappendr   	enumeratearrayr   )r   r   Zpdb_fhparserZ	structuremodelsmodelr	   r
   r   r   	chain_idsr   chainresZres_shortnameZrestype_idxposmaskZres_b_factorsZatomZunique_chain_idsr   r   r.   r   from_pdb_stringC   s~   






rD   c                 C   s*   d}|d| dd|dd|d|dS )	NZTER<6>5z      >3r$   >1>4r   )
atom_indexZend_resnameZ
chain_namer   Z	chain_endr   r   r   
_chain_end   s   rK   protc                    s|  t jdg   fdd}t j}g }| j}| j}| j}| jtj	}| j
tj	}| j}	t|t jkr7tdi }
t|D ]}|tkrLtdt dt| |
|< q>|d d}|d	 }t|jd	 D ]}||| kr|t||||d  |
||d   ||d   || }|d7 }||| }t||| || |	| D ]n\}}}}|d
k rqd}t|dkr|nd| }d}d}d}|d	 }d}|d|dd|d|d|dd|
||  d|| d|dd|d	 d|d d|d d|d|dd|d|d}|| |d7 }qqe|t|||d |
|d  |d  |d |d dd  |D }d!|d! S )"zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r%   c                    s   t j |  dS )NZUNK)r   Zrestype_1to3r4   )rrestypesr   r   res_1to3   s   zto_pdb.<locals>.res_1to3zInvalid aatypes.z The PDB format supports at most z chains.zMODEL     1r"   r   r(   ATOM   r$    r'   rE   rF   z<4rH   rG   rI   z   z>8.3fr#   z>6.2fz
          z>2ZENDMDLENDc                 S   s   g | ]}| d qS )P   )ljust)r)   liner   r   r   r/      r-   zto_pdb.<locals>.<listcomp>
)r   rO   r7   r   r
   r	   r   Zastyper   Zint32r   r   anyr5   r   r   r   PDB_CHAIN_IDSr9   rangeshaperK   zipr   join)rL   rP   r7   Z	pdb_linesr   r
   r	   r   r   r   r?   irJ   Zlast_chain_indexZ
res_name_3Z	atom_namerB   rC   Zb_factorZrecord_typer6   Zalt_locZinsertion_codeZ	occupancyelementZchargeZ	atom_liner   rN   r   to_pdb   s   	








rb   c                 C   s   t j| j S )as  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are
    reported in the PDB. This function computes a mask according to heavy atoms
    that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r   ZSTANDARD_ATOM_MASKr
   )rL   r   r   r   ideal_atom_mask   s   rc   featuresresultr   c                 C   sb   d| v r| d d }nt | d }|du rt |d }t| d |d |d | d d ||dS )	a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      fold_output: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.

    Returns:
      A protein instance.
    asym_idr"   r
   NZfinal_atom_maskZfinal_atom_positionsr   r
   r	   r   r   r   r   r   Z
zeros_liker   )rd   re   r   r   r   r   r   from_prediction  s   
ri   c                 C   sb   d| v r| d d }nt | d }|du rt | d }t| d | d | d | d d ||dS )	zAssembles a standard pdb from input atom positions & mask.

    Args:
      features: Dictionary holding model inputs.
      b_factors: (Optional) B-factors to use for the protein.

    Returns:
      A protein instance.
    rf   r"   r
   NZall_atom_maskZall_atom_positionsr   rg   rh   )rd   r   r   r   r   r   from_feature'  s   
rj   )N)r   dataclassesr0   typingr   r   r   numpyr   ZBio.PDBr   Z&modelscope.models.science.unifold.datar   strr   ZFeatureDictZModelOutputr[   r   r   	dataclassr   rD   rK   rb   rc   ri   rj   r   r   r   r   <module>   s>   
"N`
!