reference #

Utilities for automatically selecting 'reference' atoms for alignment.

Functions:

queries_to_idxs –

Find the indices of those atoms matched by a set of atom queries.
select_ligand_idxs –

Returns the indices of the reference atoms that may be used to align ligands.
select_receptor_idxs –

Select possible protein atoms for Boresch-style restraints using the method
check_receptor_idxs –

Check if the specified receptor atoms meet the criteria for use in Boresch-style
select_protein_cavity_atoms –

Select the alpha carbon atoms that define the binding cavity of the protein based

queries_to_idxs #

queries_to_idxs(
    topology: Topology, queries: Iterable[str]
) -> tuple[int, ...]

Find the indices of those atoms matched by a set of atom queries.

Parameters:

topology (Topology) –

The ligand to query.
queries (Iterable[str]) –

The atom selection queries.

Returns:

tuple[int, ...] –

The indices of the matched atoms.

Source code in femto/fe/reference.py

def queries_to_idxs(
    topology: mdtop.Topology, queries: typing.Iterable[str]
) -> tuple[int, ...]:
    """Find the indices of those atoms matched by a set of atom queries.

    Args:
        topology: The ligand to query.
        queries: The atom selection queries.

    Returns:
        The indices of the matched atoms.
    """
    ref_idxs = []

    for query in queries:
        mask_idxs = topology.select(query)

        if len(mask_idxs) != 1:
            raise ValueError(
                f"{query} matched {len(mask_idxs)} atoms. exactly 1 atom was expected."
            )

        ref_idxs.extend(mask_idxs)

    return tuple(ref_idxs)

select_ligand_idxs #

select_ligand_idxs(
    ligand_1: Topology,
    ligand_2: Topology | None,
    method: LigandReferenceMethod,
    ligand_1_queries: tuple[str, str, str] | None = None,
    ligand_2_queries: tuple[str, str, str] | None = None,
) -> tuple[
    tuple[int, int, int], tuple[int, int, int] | None
]

Returns the indices of the reference atoms that may be used to align ligands.

Notes

Some methods, e.g. chen, select reference atoms based on the two ligands meaning they can only be used in RBFE calculations, whereas others, e.g. baumann, select reference atoms based on a single ligand.

Parameters:

ligand_1 (Topology) –

The first ligand.
ligand_2 (Topology | None) –

The second ligand.
method (LigandReferenceMethod) –

The method to use to select the reference atoms if none are specified.
ligand_1_queries (tuple[str, str, str] | None, default: None ) –

Three (optional) selection queries to use to manually select atoms from the first ligand.
ligand_2_queries (tuple[str, str, str] | None, default: None ) –

Three (optional) selection queries to use to manually select atoms from the second ligand

Returns:

tuple[tuple[int, int, int], tuple[int, int, int] | None] –

The indices of the first and second ligand respectively. No offset is applied to the second ligand indices so a query of "idx. 1" would yield 0 rather than n_ligand_1_atoms.

Source code in femto/fe/reference.py

def select_ligand_idxs(
    ligand_1: mdtop.Topology,
    ligand_2: mdtop.Topology | None,
    method: femto.fe.config.LigandReferenceMethod,
    ligand_1_queries: tuple[str, str, str] | None = None,
    ligand_2_queries: tuple[str, str, str] | None = None,
) -> tuple[tuple[int, int, int], tuple[int, int, int] | None]:
    """Returns the indices of the reference atoms that may be used to align ligands.

    Notes:
        * Some methods, e.g. ``chen``, select reference atoms based on the two ligands
          meaning they can only be used in RBFE calculations, whereas others, e.g.
          ``baumann``, select reference atoms based on a single ligand.

    Args:
        ligand_1: The first ligand.
        ligand_2: The second ligand.
        method: The method to use to select the reference atoms if none are specified.
        ligand_1_queries: Three (optional) selection queries to use to manually
            select atoms from the first ligand.
        ligand_2_queries: Three (optional) selection queries to use to manually
            select atoms from the second ligand

    Returns:
        The indices of the first and second ligand respectively. No offset is applied
        to the second ligand indices so a query of ``"idx. 1"`` would yield ``0``
        rather than ``n_ligand_1_atoms``.
    """
    if ligand_1_queries is None or (ligand_2 is not None and ligand_2_queries is None):
        _LOGGER.info("selecting ligand reference atoms")

        ligand_queries = _create_ligand_queries(ligand_1, ligand_2, method)

        if ligand_1_queries is None:
            ligand_1_queries = ligand_queries[0]
        if ligand_2_queries is None and ligand_2 is not None:
            ligand_2_queries = ligand_queries[1]

    _LOGGER.info(f"ligand 1 ref queries={ligand_1_queries}")
    _LOGGER.info(f"ligand 2 ref queries={ligand_2_queries}")

    ligand_1_idxs = queries_to_idxs(ligand_1, ligand_1_queries)

    if ligand_2 is not None:
        ligand_2_idxs = queries_to_idxs(ligand_2, ligand_2_queries)
    else:
        ligand_2_idxs = None

    return ligand_1_idxs, ligand_2_idxs

select_receptor_idxs #

select_receptor_idxs(
    topology: Topology | Trajectory,
    ligand_ref_idxs: tuple[int, int, int],
) -> tuple[int, int, int]

Select possible protein atoms for Boresch-style restraints using the method outlined by Baumann et al [1].

References

[1] Baumann, Hannah M., et al. "Broadening the scope of binding free energy calculations using a Separated Topologies approach." (2023).

Parameters:

topology (Topology | Trajectory) –

The topology containing the receptor and ligands.
ligand_ref_idxs (tuple[int, int, int]) –

The indices of the three ligands atoms that will be restrained.

Returns:

tuple[int, int, int] –

The indices of the three atoms to use for the restraint

Source code in femto/fe/reference.py

def select_receptor_idxs(
    topology: mdtop.Topology | mdtraj.Trajectory,
    ligand_ref_idxs: tuple[int, int, int],
) -> tuple[int, int, int]:
    """Select possible protein atoms for Boresch-style restraints using the method
    outlined by Baumann et al [1].

    References:
        [1] Baumann, Hannah M., et al. "Broadening the scope of binding free energy
            calculations using a Separated Topologies approach." (2023).

    Args:
        topology: The topology containing the receptor and ligands.
        ligand_ref_idxs: The indices of the three ligands atoms that will be restrained.

    Returns:
        The indices of the three atoms to use for the restraint
    """

    if isinstance(topology, mdtop.Topology):
        topology = _topology_to_mdtraj(topology)

    receptor_idxs = _filter_receptor_atoms(topology, ligand_ref_idxs[0])

    l1, l2, l3 = ligand_ref_idxs

    valid_r1_idxs = [
        r1 for r1 in receptor_idxs if _is_valid_r1(topology, r1, l1, l2, l3)
    ]

    found_r1, found_r2 = next(
        (
            (r1, r2)
            for r1 in valid_r1_idxs
            for r2 in receptor_idxs
            if _is_valid_r2(topology, r1, r2, l1, l2)
        ),
        None,
    )

    if found_r1 is None or found_r2 is None:
        raise ValueError("could not find valid R1 / R2 atoms")

    valid_r3_idxs = [
        r3 for r3 in receptor_idxs if _is_valid_r3(topology, found_r1, found_r2, r3, l1)
    ]

    if len(valid_r3_idxs) == 0:
        raise ValueError("could not find a valid R3 atom")

    r3_distances_per_frame = []

    for frame in topology.xyz:
        r3_r_distances = scipy.spatial.distance.cdist(
            frame[valid_r3_idxs, :], frame[[found_r1, found_r2], :]
        )
        r3_l_distances = scipy.spatial.distance.cdist(
            frame[valid_r3_idxs, :], frame[[ligand_ref_idxs[0]], :]
        )

        r3_distances_per_frame.append(numpy.hstack([r3_r_distances, r3_l_distances]))

    # chosen to match the SepTop reference implementation at commit 3705ba5
    max_distance = 0.8 * (topology.unitcell_lengths.mean(axis=0).min(axis=-1) / 2)

    r3_distances_avg = numpy.stack(r3_distances_per_frame).mean(axis=0)

    max_distance_mask = r3_distances_avg.max(axis=-1) < max_distance
    r3_distances_avg = r3_distances_avg[max_distance_mask]

    valid_r3_idxs = numpy.array(valid_r3_idxs)[max_distance_mask].tolist()

    r3_distances_prod = r3_distances_avg[:, 0] * r3_distances_avg[:, 1]
    found_r3 = valid_r3_idxs[r3_distances_prod.argmax()]

    return found_r1, found_r2, found_r3

check_receptor_idxs #

check_receptor_idxs(
    topology: Topology | Trajectory,
    receptor_idxs: tuple[int, int, int],
    ligand_ref_idxs: tuple[int, int, int],
) -> bool

Check if the specified receptor atoms meet the criteria for use in Boresch-style restraints as defined by Baumann et al [1].

References

[1] Baumann, Hannah M., et al. "Broadening the scope of binding free energy calculations using a Separated Topologies approach." (2023).

Parameters:

topology (Topology | Trajectory) –

The system topology.
receptor_idxs (tuple[int, int, int]) –

The indices of the three receptor atoms that will be restrained.
ligand_ref_idxs (tuple[int, int, int]) –

The indices of the three ligand atoms that will be restrained.

Returns:

bool –

True if the atoms meet the criteria, False otherwise.

Source code in femto/fe/reference.py

def check_receptor_idxs(
    topology: mdtop.Topology | mdtraj.Trajectory,
    receptor_idxs: tuple[int, int, int],
    ligand_ref_idxs: tuple[int, int, int],
) -> bool:
    """Check if the specified receptor atoms meet the criteria for use in Boresch-style
    restraints as defined by Baumann et al [1].

    References:
        [1] Baumann, Hannah M., et al. "Broadening the scope of binding free energy
            calculations using a Separated Topologies approach." (2023).

    Args:
        topology: The system topology.
        receptor_idxs: The indices of the three receptor atoms that will be restrained.
        ligand_ref_idxs: The indices of the three ligand atoms that will be restrained.

    Returns:
        True if the atoms meet the criteria, False otherwise.
    """

    if isinstance(topology, mdtop.Topology):
        topology = _topology_to_mdtraj(topology)

    r1, r2, r3 = receptor_idxs
    l1, l2, l3 = ligand_ref_idxs

    is_valid_r1 = _is_valid_r1(topology, r1, l1, l2, l3)
    is_valid_r2 = _is_valid_r2(topology, r1, r2, l1, l2)
    is_valid_r3 = _is_valid_r3(topology, r1, r2, r3, l1)

    r3_distances_per_frame = [
        scipy.spatial.distance.cdist(frame[[r3], :], frame[[r1, r2], :])
        for frame in topology.xyz
    ]
    r3_distance_avg = numpy.stack(r3_distances_per_frame).mean(axis=0)

    max_distance = 0.8 * (topology.unitcell_lengths[-1][0] / 2)
    is_valid_distance = r3_distance_avg.max(axis=-1) < max_distance

    return is_valid_r1 and is_valid_r2 and is_valid_r3 and is_valid_distance

select_protein_cavity_atoms #

select_protein_cavity_atoms(
    protein: Topology,
    ligands: list[Topology],
    cutoff: Quantity,
) -> str

Select the alpha carbon atoms that define the binding cavity of the protein based on their distance to ligand atoms.

Parameters:

protein (Topology) –

The protein.
ligands (list[Topology]) –

The ligands to consider.
cutoff (Quantity) –

Residues further than this distance from a ligand will be excluded.

Returns:

str –

The AMBER style query that will select the reference atoms of the protein.

Source code in femto/fe/reference.py

def select_protein_cavity_atoms(
    protein: mdtop.Topology,
    ligands: list[mdtop.Topology],
    cutoff: openmm.unit.Quantity,
) -> str:
    """Select the alpha carbon atoms that define the binding cavity of the protein based
    on their distance to ligand atoms.

    Args:
        protein: The protein.
        ligands: The ligands to consider.
        cutoff: Residues further than this distance from a ligand will be excluded.

    Returns:
        The AMBER style query that will select the reference atoms of the protein.
    """

    ref_atoms = []

    for residue in protein.residues:
        if (
            sorted(a.atomic_num for a in residue.atoms) == [1, 1, 8]
            # a bit of a hack to check for ions.
            or len(residue.atoms) == 1
        ):
            continue

        c_alpha_idxs = [a.index for a in residue.atoms if a.name == "CA"]

        if len(c_alpha_idxs) < 1:
            continue

        ref_atoms.extend(c_alpha_idxs)

    protein_coords = protein.xyz.value_in_unit(openmm.unit.angstrom)[ref_atoms, :]

    cutoff = cutoff.value_in_unit(openmm.unit.angstrom)

    is_reference = numpy.array([False] * len(ref_atoms))

    for ligand in ligands:
        ligand_coords = ligand.xyz.value_in_unit(openmm.unit.angstrom)

        distances = scipy.spatial.distance_matrix(protein_coords, ligand_coords)
        is_reference = is_reference | (distances < cutoff).any(axis=1)

    n_atoms = is_reference.sum()

    if n_atoms < 1:
        raise RuntimeError("Could not find the protein binding site reference atoms.")

    ref_mask = "index " + "+".join(
        str(i + 1) for i, keep in zip(ref_atoms, is_reference, strict=True) if keep
    )
    return ref_mask