Source code for pdb_superimposer.superimposition

"""

A Module to superimpose two protein chains.

"""

import numpy as np
from Bio import PDB
from scipy.spatial import distance


[docs]class ChainSuperimposer: """ A class to organise the superimposition of two chains. First the atoms involved in the operation are selected. Then :class:`Bio.PDB.Superimposer.Superimposer` is employed to do the superimposition. """
[docs] def __init__(self, reference, other, other_struc): """ Initialise the class. :param reference: The chain used as a template in the superimposition. :type reference: :class:`Bio.PDB.Chain.Chain` :param other: The chain to be transformed. :type other: :class:`Bio.PDB.Chain.Chain` :param other_struc: The structure object of the other chain. This enables the transformation matrix to be applied across the whole structure. :type other_struc: :class:`Bio.PDB.Structure.Structure` """ # Input data self.reference = reference self.other = other self.other_struc = other_struc # Needed to translate the whole structure # ensure only single chains are supplied assert (reference.get_level() == 'C') assert (other.get_level() == 'C') # Setup a dictionary matching uniprot res index with res. Discard ligands and water. # res.get_id()[0] ---> hetid flag # res.get_id()[1] ---> res index self.reference_seq = {res.get_id()[1]: res for res in self.reference if res.get_id()[0] == " "} self.other_seq = {res.get_id()[1]: res for res in self.other if res.get_id()[0] == " "} self.selected_index = self._residue_selection()
[docs] def _residue_selection(self): """ A class method to do the essential residue filtering using the UniProt sequence positions. - Find intersecting sequence indices between the reference and other. - Remove index if at a given UniProt index the residue type in reference and other is not the same - Remove index if at a given UniProt index either the reference or other residue is incomplete :return: A set of sequence indices :rtype: set """ sele = set(self.reference_seq.keys()).intersection(set(self.other_seq.keys())) for i in sele: # print(self.reference_seq[i].resname != self.other_seq[i].resname) # print(self.is_residue_incomplete(self.reference_seq[i])) print(self.is_residue_incomplete(self.other_seq[i])) rm = {i for i in sele if any((self.reference_seq[i].resname != self.other_seq[i].resname, self.is_residue_incomplete(self.reference_seq[i]), self.is_residue_incomplete(self.other_seq[i]) )) } for r in rm: sele.remove(r) return sele
[docs] def superimpose(self, hetid=None, within=8.0): """ The superimposition method. If optional `hetid` is supplied, only binding site residues will used for the superimposition. :param hetid: Ligand identifier :type hetid: str :param binding_site: flag :type binding_site: bool :param within: binding site cutoff distance :type within: float .. code-block:: python # Example useage from pdb_superimposer import ChainSuperimposer, Helper ref = Helper.protein_from_file("2VTA", "2VTA.pdb") ref_chain = [c for c in ref[0]][0] other = Helper.protein_from_file("6YLK", "6YLK.pdb") ref_chain = [c for c in other[0]][0] cs = ChainSuperimposer(reference=ref_chain, other=other_chain, other_struc=other) cs.superimpose() """ if hetid: # detect binding site bs = self.binding_site(chain=self.other, hetid=hetid, within=within) self.selected_index = self.selected_index.intersection(bs) super_imposer = PDB.Superimposer() # set the active atoms reference_atms = [atm for resi in self.reference_seq.values() for atm in resi if resi.get_id()[1] in self.selected_index and atm.element != "H"] other_atms = [atm for resi in self.other_seq.values() for atm in resi if resi.get_id()[1] in self.selected_index and atm.element != "H"] super_imposer.set_atoms(reference_atms, other_atms) # apply the transformation matrix to the whole chain super_imposer.apply(self.other_struc.get_atoms()) self.rms = super_imposer.rms
[docs] @staticmethod def is_residue_incomplete(res): """ Determine whether the residue is incomplete (i.e. whether there are missing residue atoms) :param res: A PDB residue :type: `Bio.PDB.Residue.Residue` :return: A boolean expressing whether the residue is incomplete :rtype: bool >>> from pdb_superimposer import ChainSuperimposer, Helper >>> ref = Helper.protein_from_file("2VTA", "2VTA.pdb") >>> ref_chain = [c for c in ref[0]][0] >>> first_res = [r for r in ref_chain][0] >>> bs = ChainSuperimposer.is_residue_incomplete(first_res) False """ aa_atm_dic = {'MET': 8, 'GLU': 9, 'ASN': 8, 'PHE': 11, 'GLN': 9, 'LYS': 9, 'VAL': 7, 'ILE': 8, 'GLY': 4, 'THR': 7, 'TYR': 12, 'ALA': 5, 'ARG': 11, 'LEU': 8, 'SER': 6, 'HIS': 10, 'PRO': 7, 'ASP': 8, 'CYS': 6, 'TRP': 14} return len([a for a in res if a.element != "H"]) != aa_atm_dic[res.resname]
[docs] @staticmethod def binding_site(chain, hetid, within=8.0): """ Detect binding site residues for a supplied hetid :param chain: A protein chain :param hetid: A ligand identifier :param within: A cutoff distance :type chain: `Bio.PDB.Chain` :type hetid: str :type within: float :return: A boolean list denoting inclusion in the superimposition operation :rtype: list >>> from pdb_superimposer import ChainSuperimposer, Helper >>> ref = Helper.protein_from_file("2VTA", "2VTA.pdb") >>> ref_chain = [c for c in ref[0]][0] >>> ans = ChainSuperimposer.binding_site(ref_chain, "LZ1", 10.0) {1, 2, 3, ...} """ # array of ligand coords ligand = [resi for resi in chain if hetid in resi.get_id()[0]][0] ligand_atms = np.array([atm.get_coord() for atm in ligand]) # list of array's containing residue coords resi_atms = [np.array([atm.get_coord() for atm in resi]) for resi in chain if resi.get_id()[0] == " "] # loop through the residues, # calculate and all-by-all atom distances # if any atom is within the cutoff distance, return True return {r.get_id()[1] for r in resi_atms if np.amin(distance.cdist(r, ligand_atms)) <= within}
class Helper: @staticmethod def protein_from_file(name, fpath): """ Reads a PDB file. :param name: name of object :param fpath: path to pdb file :type name: str :type fpath: str :return: a protein structure :rtype: `Bio.PDB.Structure.Structure` """ return PDB.PDBParser().get_structure(name, fpath) @staticmethod def protein_to_file(structure, fpath): """ Writes a PDB to file. :param structure: a protein structure :param fpath: path for output pdb file :type structure: `Bio.PDB.Structure.Structure` :type fpath: str """ io = PDB.PDBIO() io.set_structure(structure) io.save(fpath)