Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/openfe_analysis/reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pathlib
from typing import Literal, Optional

import MDAnalysis as mda
import netCDF4 as nc
import numpy as np
import yaml
Expand All @@ -11,6 +12,16 @@
from openfe_analysis.utils.multistate import _determine_position_indices


def _create_universe_single_state(top, trj, state):
return mda.Universe(
top,
trj,
index=state,
index_method="state",
format=FEReader,
)


def _determine_iteration_dt(dataset) -> float:
"""
Determine the time increment between successive iterations
Expand Down
96 changes: 5 additions & 91 deletions src/openfe_analysis/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,96 +7,9 @@
import numpy as np
from MDAnalysis.analysis import diffusionmap, rms
from MDAnalysis.analysis.base import AnalysisBase
from MDAnalysis.transformations import unwrap

from .reader import FEReader
from .transformations import Aligner, ClosestImageShift, NoJump


def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
"""
Construct an MDAnalysis Universe from a MultiState NetCDF trajectory
and apply standard analysis transformations.

The Universe is created using the custom ``FEReader`` to extract a
single state from a multistate simulation.

Parameters
----------
top : pathlib.Path or Topology
Path to a topology file (e.g. PDB) or an already-loaded MDAnalysis
topology object.
trj : nc.Dataset
Open NetCDF dataset produced by
``openmmtools.multistate.MultiStateReporter``.
state : int
Thermodynamic state index to extract from the multistate trajectory.

Returns
-------
MDAnalysis.Universe
A Universe with trajectory transformations applied.

Notes
-----
Identifies two AtomGroups:

- Protein, defined as having standard amino acid names, then filtered down to CA
- Ligand, defined as "resname UNK"

Depending on whether a protein is present, a sequence of trajectory
transformations is applied:

If a protein is present:

- Unwraps protein and ligand atom to be made whole
- Shifts protein chains and the ligand to the image closest to the first
protein chain (:class:`ClosestImageShift`)
- Aligns the entire system to minimise the protein RMSD (:class:`Aligner`)

If only a ligand is present:

- Prevents the ligand from jumping between periodic images
- Aligns the ligand to minimize its RMSD
"""
u = mda.Universe(
top,
trj,
index=state,
index_method="state",
format=FEReader,
)
prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

if prot:
# Unwrap all atoms
unwrap_tr = unwrap(prot + ligand)

# Shift chains + ligand
chains = [seg.atoms for seg in prot.segments]
shift = ClosestImageShift(chains[0], [*chains[1:], ligand])

align = Aligner(prot)

u.trajectory.add_transformations(
unwrap_tr,
shift,
align,
)
else:
# if there's no protein
# - make the ligand not jump periodic images between frames
# - align the ligand to minimise its RMSD
nope = NoJump(ligand)
align = Aligner(ligand)

u.trajectory.add_transformations(
nope,
align,
)

return u
from .reader import _create_universe_single_state
from .utils.apply_transformations import apply_transformations


class Protein2DRMSD(AnalysisBase):
Expand Down Expand Up @@ -289,11 +202,12 @@ def gather_rms_data(
for i in range(n_lambda):
# cheeky, but we can read the PDB topology once and reuse per universe
# this then only hits the PDB file once for all replicas
u = make_Universe(u_top._topology, ds, state=i)

u = _create_universe_single_state(u_top._topology, ds, i)
prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

apply_transformations(u, prot, ligand)

if prot:
prot_rmsd = RMSDAnalysis(prot).run(step=skip)
output["protein_RMSD"].append(prot_rmsd.results.rmsd)
Expand Down
37 changes: 18 additions & 19 deletions src/openfe_analysis/tests/test_rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from MDAnalysis.transformations import unwrap
from numpy.testing import assert_allclose

from openfe_analysis.reader import FEReader
from openfe_analysis.rmsd import gather_rms_data, make_Universe
from openfe_analysis.reader import _create_universe_single_state
from openfe_analysis.rmsd import gather_rms_data
from openfe_analysis.transformations import Aligner
from openfe_analysis.utils import apply_transformations


@pytest.fixture
Expand All @@ -22,11 +23,10 @@ def mda_universe(hybrid_system_skipped_pdb, simulation_skipped_nc):
Guarantees:
- NetCDF file is opened exactly once
"""
u = make_Universe(
hybrid_system_skipped_pdb,
simulation_skipped_nc,
state=0,
)
u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
protein = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")
apply_transformations.apply_transformations(u, protein, ligand)
yield u
u.trajectory.close()

Expand Down Expand Up @@ -108,13 +108,8 @@ def test_gather_rms_data_regression_skippednc(simulation_skipped_nc, hybrid_syst


def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_pdb):
u = mda.Universe(
hybrid_system_skipped_pdb,
simulation_skipped_nc,
index=0,
format=FEReader,
)
prot = u.select_atoms("protein")
u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
prot = u.select_atoms("protein and name CA")
# Do other transformations, but no shifting
unwrap_tr = unwrap(prot)
for frag in prot.fragments:
Expand All @@ -132,8 +127,11 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p
u.trajectory.close()

# RMSD with shifting
u2 = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0)
prot2 = u2.select_atoms("protein")
u2 = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
prot2 = u2.select_atoms("protein and name CA")

apply_transformations.apply_transformations(u2, protein=prot2)

R2 = rms.RMSD(prot2)
R2.run()
rmsd_shift = R2.rmsd[:, 2]
Expand All @@ -142,9 +140,10 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p


def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_skipped_pdb):
u = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0)
u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
protein = u.select_atoms("protein and name CA")
apply_transformations.apply_transformations(u, protein)

protein = u.select_atoms("protein")
chain = protein.segments[0].atoms

rgs = []
Expand All @@ -158,7 +157,7 @@ def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_sk

def test_rmsd_reference_is_first_frame(mda_universe):
u = mda_universe
prot = u.select_atoms("protein")
prot = u.select_atoms("protein and name CA")

_ = next(iter(u.trajectory)) # SAFE
ref = prot.positions.copy()
Expand Down
89 changes: 89 additions & 0 deletions src/openfe_analysis/utils/apply_transformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Optional

import MDAnalysis as mda
from MDAnalysis.transformations import unwrap

from ..transformations import Aligner, ClosestImageShift, NoJump


def apply_transformations(
Comment thread
hannahbaumann marked this conversation as resolved.
u: mda.Universe,
protein: Optional[mda.AtomGroup] = None,
ligand: Optional[mda.AtomGroup] = None,
):
"""
Apply a collection of transformations to a Universe.

Parameters
----------
u: Universe
The Universe the transformations are applied to
protein: Optional[AtomGroup]
The AtomGroup of the protein
ligand: Optional[AtomGroup]
The AtomGroup of the ligand

Notes
-----
Depending on whether a protein is present, a sequence of trajectory
transformations is applied:

If a protein is present:

- Unwraps protein and ligand atom to be made whole
- Shifts protein chains and the ligand to the image closest to the first
protein chain (:class:`ClosestImageShift`)
- Aligns the entire system to minimise the protein RMSD (:class:`Aligner`)

If only a ligand is present:

- Prevents the ligand from jumping between periodic images
- Aligns the ligand to minimize its RMSD
"""
has_protein = protein is not None and protein.n_atoms > 0
has_ligand = ligand is not None and ligand.n_atoms > 0

if has_protein:
lig = ligand if has_ligand else None
transforms = _apply_transformations_complex(protein, lig)
elif has_ligand:
transforms = _apply_transformations_ligand_only(ligand)
else:
return

u.trajectory.add_transformations(*transforms)


def _apply_transformations_complex(protein, ligand=None):
"""
Build transformations for systems containing a protein
and optionally a ligand.
"""
transforms = []
# 1. Make molecules whole (protein + optional ligand)
group = protein if ligand is None else protein + ligand
transforms.append(unwrap(group))

# 2. Closest image shift for protein chains + ligand (if present)
chains = [seg.atoms for seg in protein.segments]
shift_targets = chains[1:]
if ligand is not None:
shift_targets.append(ligand)
transforms.append(ClosestImageShift(chains[0], shift_targets))

# 3. Align on protein backbone/atoms
transforms.append(Aligner(protein))

return transforms


def _apply_transformations_ligand_only(ligand):
"""
Build transformations for ligand-only systems.
- make the ligand not jump periodic images between frames
- align the ligand to minimize its RMSD
"""
return [
NoJump(ligand),
Aligner(ligand),
]