from Bio.PDB import PDBParser
from Bio.PDB import Selection
structure = PDBParser().get_structure('4GBX', '4GBX.pdb') # load your molecule
atom_list = Selection.unfold_entities(structure[0]['E'], 'A') # 'A' is for Atoms in the chain 'E'
当我使用上面的代码在 PDB 4GBX 中展开链 E 时,
atom_list
中的最后 2 个氧原子属于同一链中的水杂原子。如何获得仅包含蛋白质残基原子的列表并避免选择中的其他配体或水分子?
问题代码:
from Bio.PDB import PDBParser
from Bio.PDB import Selection
structure = PDBParser(QUIET = True).get_structure('4GBX', '4gbx.pdb') # load your molecule
atom_list = Selection.unfold_entities(structure[0]['E'], 'A') # 'A' is for Atoms in the chain 'E'
for i in atom_list :
print(i , i.element, i.parent.id)
输出:
...........
...........
...........
<Atom CA> C (' ', 93, ' ')
<Atom C> C (' ', 93, ' ')
<Atom O> O (' ', 93, ' ')
<Atom CB> C (' ', 93, ' ')
<Atom OG1> O (' ', 93, ' ')
<Atom CG2> C (' ', 93, ' ')
<Atom O> O ('W', 101, ' ')
<Atom O> O ('W', 102, ' ')
根据评论回答:
from Bio.PDB import PDBParser
from Bio.PDB import Selection
structure = PDBParser(QUIET = True).get_structure('4GBX', '4gbx.pdb') # load your molecule
atom_list = Selection.unfold_entities(structure[0]['E'], 'A') # 'A' is for Atoms in the chain 'E'
for i in [atom for atom in atom_list if atom.get_full_id()[3][0] == " "] :
print(i , i.element, i.parent.id)
输出:
...........
...........
...........
<Atom CA> C (' ', 93, ' ')
<Atom C> C (' ', 93, ' ')
<Atom O> O (' ', 93, ' ')
<Atom CB> C (' ', 93, ' ')
<Atom OG1> O (' ', 93, ' ')
<Atom CG2> C (' ', 93, ' ')