RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
AlignMolecules.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_ALIGNMOLECULES_H_
12#define _RD_ALIGNMOLECULES_H_
13
15#include <Numerics/Vector.h>
16#include <vector>
17
18namespace RDKit {
19typedef std::vector<std::pair<int, int>> MatchVectType;
20
21class Conformer;
22class ROMol;
23class RWMol;
24namespace MolAlign {
25class RDKIT_MOLALIGN_EXPORT MolAlignException : public std::exception {
26 public:
27 //! construct with an error message
28 MolAlignException(const char *msg) : _msg(msg) {}
29 //! construct with an error message
30 MolAlignException(const std::string msg) : _msg(msg) {}
31 //! get the error message
32 const char *what() const noexcept override { return _msg.c_str(); }
33 ~MolAlignException() noexcept override = default;
34
35 private:
36 std::string _msg;
37};
38
39//! Alignment functions
40
41//! Compute the transformation required to align a molecule
42/*!
43 The 3D transformation required to align the specified conformation in the
44 probe molecule to a specified conformation in the reference molecule is
45 computed so that the root mean squared distance between a specified set of
46 atoms is minimized
47
48 \param prbMol molecule that is to be aligned
49 \param refMol molecule used as the reference for the alignment
50 \param trans storage for the computed transform
51 \param prbCid ID of the conformation in the probe to be used
52 for the alignment (defaults to first conformation)
53 \param refCid ID of the conformation in the ref molecule to which
54 the alignment is computed (defaults to first conformation)
55 \param atomMap a vector of pairs of atom IDs (probe AtomId, ref AtomId)
56 used to compute the alignments. If this mapping is
57 not specified an attempt is made to generate on by
58 substructure matching
59 \param weights Optionally specify weights for each of the atom pairs
60 \param reflect if true reflect the conformation of the probe molecule
61 \param maxIters maximum number of iterations used in minimizing the RMSD
62
63 <b>Returns</b>
64 RMSD value
65*/
66RDKIT_MOLALIGN_EXPORT double getAlignmentTransform(
67 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans,
68 int prbCid = -1, int refCid = -1, const MatchVectType *atomMap = nullptr,
69 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
70 unsigned int maxIters = 50);
71
72//! Optimally (minimum RMSD) align a molecule to another molecule
73/*!
74 The 3D transformation required to align the specified conformation in the
75 probe molecule to a specified conformation in the reference molecule is
76 computed so that the root mean squared distance between a specified set of
77 atoms is minimized. This transform is then applied to the specified
78 conformation in the probe molecule
79
80 \param prbMol molecule that is to be aligned
81 \param refMol molecule used as the reference for the alignment
82 \param prbCid ID of the conformation in the probe to be used
83 for the alignment (defaults to first conformation)
84 \param refCid ID of the conformation in the ref molecule to which
85 the alignment is computed (defaults to first conformation)
86 \param atomMap a vector of pairs of atom IDs (probe AtomId, ref AtomId)
87 used to compute the alignments. If this mapping is
88 not specified an attempt is made to generate on by
89 substructure matching
90 \param weights Optionally specify weights for each of the atom pairs
91 \param reflect if true reflect the conformation of the probe molecule
92 \param maxIters maximum number of iterations used in minimizing the RMSD
93
94 <b>Returns</b>
95 RMSD value
96*/
97RDKIT_MOLALIGN_EXPORT double alignMol(
98 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
99 const MatchVectType *atomMap = nullptr,
100 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
101 unsigned int maxIters = 50);
102
103//! Compute the optimal RMS, transformation and atom map for aligning
104//! two molecules, taking symmetry into account. Molecule coordinates
105//! are left unaltered.
106/*!
107 This function will attempt to align all permutations of matching atom
108 orders in both molecules, for some molecules it will lead to 'combinatorial
109 explosion' especially if hydrogens are present.
110 Use 'RDKit::MolAlign::getAlignmentTransform' to align molecules
111 without changing the atom order.
112
113 \param prbMol the molecule to be aligned to the reference
114 \param refMol the reference molecule
115 \param bestTrans storage for the best computed transform
116 \param bestMatch storage for the MatchVectType corresponding to
117 the best match found.
118 \param prbCid (optional) probe conformation to use
119 \param refCid (optional) reference conformation to use
120 \param map (optional) a vector of vectors of pairs of atom IDs
121 (probe AtomId, ref AtomId) used to compute the alignments.
122 If not provided, these will be generated using a
123 substructure search.
124 \param maxMatches (optional) if map is empty, this will be the max number of
125 matches found in a SubstructMatch().
126 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
127 terminal functional groups (like nitro or carboxylate)
128 will be considered symmetrically
129 \param weights (optional) weights for each pair of atoms.
130 \param reflect if true reflect the conformation of the probe molecule
131 \param maxIters maximum number of iterations used in minimizing the RMSD
132 \param numThreads (optional) number of threads to use during the calculation
133
134 <b>Returns</b>
135 Best RMSD value found
136*/
137RDKIT_MOLALIGN_EXPORT double getBestAlignmentTransform(
138 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans,
139 MatchVectType &bestMatch, int prbCid = -1, int refCid = -1,
140 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
141 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
142 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
143 unsigned int maxIters = 50, int numThreads = 1);
144
145//! Returns the optimal RMS for aligning two molecules, taking
146/// symmetry into account. As a side-effect, the probe molecule is
147/// left in the aligned state.
148/*!
149 This function will attempt to align all permutations of matching atom
150 orders in both molecules, for some molecules it will lead to 'combinatorial
151 explosion' especially if hydrogens are present.
152 Use 'RDKit::MolAlign::alignMol' to align molecules without changing the
153 atom order.
154
155 \param prbMol the molecule to be aligned to the reference
156 \param refMol the reference molecule
157 \param trans storage for the computed transform
158 \param prbCid (optional) probe conformation to use
159 \param refCid (optional) reference conformation to use
160 \param map (optional) a vector of vectors of pairs of atom IDs
161 (probe AtomId, ref AtomId) used to compute the alignments.
162 If not provided, these will be generated using a
163 substructure search.
164 \param maxMatches (optional) if map is empty, this will be the max number of
165 matches found in a SubstructMatch().
166 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
167 terminal functional groups (like nitro or carboxylate)
168 will be considered symmetrically
169 \param weights (optional) weights for each pair of atoms.
170 \param numThreads (optional) number of threads to use during the calculation
171
172 <b>Returns</b>
173 Best RMSD value found
174*/
175RDKIT_MOLALIGN_EXPORT double getBestRMS(
176 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
177 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
178 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
179 const RDNumeric::DoubleVector *weights = nullptr, int numThreads = 1);
180
181//! Returns the symmetric distance matrix between the conformers of a
182//! molecule.
183/// getBestRMS() is used to calculate the inter-conformer distances
184/*!
185 This function will attempt to align all permutations of matching atom
186 orders in both molecules, for some molecules it will lead to 'combinatorial
187 explosion' especially if hydrogens are present.
188
189 \param mol the molecule to be considered
190 \param numThreads (optional) number of threads to use during the calculation
191 \param map (optional) a vector of vectors of pairs of atom IDs
192 (probe AtomId, ref AtomId) used to compute the alignments.
193 If not provided, these will be generated using a
194 substructure search.
195 \param maxMatches (optional) if map is empty, this will be the max number of
196 matches found in a SubstructMatch().
197 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
198 terminal functional groups (like nitro or carboxylate)
199 will be considered symmetrically
200 \param weights (optional) weights for each pair of atoms.
201
202 <b>Returns</b>
203 a vector with the RMSD values stored in the order:
204 [(1,0), (2,0), (2,1), (3,0), (3, 2), (3,1), ...]
205*/
206RDKIT_MOLALIGN_EXPORT std::vector<double> getAllConformerBestRMS(
207 const ROMol &mol, int numThreads = 1,
208 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
209 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
210 const RDNumeric::DoubleVector *weights = nullptr);
211
212//! Returns the RMS between two molecules, taking symmetry into account.
213//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
214//! probe molecules are not aligned to the reference ahead of the
215//! RMS calculation. This is useful, for example, to compute
216//! the RMSD between docking poses and the co-crystallized ligand.
217/*!
218 This function will attempt to match all permutations of matching atom
219 orders in both molecules, for some molecules it will lead to 'combinatorial
220 explosion' especially if hydrogens are present.
221
222 \param prbMol the molecule to be aligned to the reference
223 \param refMol the reference molecule
224 \param prbCid (optional) probe conformation to use
225 \param refCid (optional) reference conformation to use
226 \param map (optional) a vector of vectors of pairs of atom IDs
227 (probe AtomId, ref AtomId) used to compute the alignments.
228 If not provided, these will be generated using a
229 substructure search.
230 \param maxMatches (optional) if map is empty, this will be the max number of
231 matches found in a SubstructMatch().
232 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
233 terminal functional groups (like nitro or carboxylate)
234 will be considered symmetrically \param weights (optional) weights for
235 each pair of atoms.
236
237 <b>Returns</b>
238 Best RMSD value found
239*/
241 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
242 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
243 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
244 const RDNumeric::DoubleVector *weights = nullptr);
245
246//! Returns the RMS between two molecules, taking symmetry into account.
247//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
248//! probe molecules are not aligned to the reference ahead of the
249//! RMS calculation. This is useful, for example, to compute
250//! the RMSD between docking poses and the co-crystallized ligand.
251/*!
252 This function will attempt to match all permutations of matching atom
253 orders in both molecules, for some molecules it will lead to 'combinatorial
254 explosion' especially if hydrogens are present.
255
256 \param prbMol the molecule to be aligned to the reference
257 \param refMol the reference molecule
258 \param prbCid (optional) probe conformation to use
259 \param refCid (optional) reference conformation to use
260 \param map (optional) a vector of vectors of pairs of atom IDs
261 (probe AtomId, ref AtomId) used to compute the alignments.
262 If not provided, these will be generated using a
263 substructure search.
264 \param maxMatches (optional) if map is empty, this will be the max number of
265 matches found in a SubstructMatch().
266 \param weights (optional) weights for each pair of atoms.
267
268 <b>Returns</b>
269 Best RMSD value found
270*/
271RDKIT_MOLALIGN_EXPORT double CalcRMS(ROMol &prbMol, const ROMol &refMol,
272 int prbCid, int refCid,
273 const std::vector<MatchVectType> &map,
274 int maxMatches,
275 const RDNumeric::DoubleVector *weights);
276
277//! Align the conformations of a molecule using a common set of atoms. If
278/// the molecules contains queries, then the queries must also match exactly.
279
280/*!
281 \param mol The molecule of interest.
282 \param atomIds vector of atoms to be used to generate the alignment.
283 All atoms will be used is not specified
284 \param confIds vector of conformations to align - defaults to all
285 \param weights (optional) weights for each pair of atoms.
286 \param reflect toggles reflecting (about the origin) the alignment
287 \param maxIters the maximum number of iterations to attempt
288 \param RMSlist if nonzero, this will be used to return the RMS values
289 between the reference conformation and the other aligned
290 conformations
291*/
292RDKIT_MOLALIGN_EXPORT void alignMolConformers(
293 ROMol &mol, const std::vector<unsigned int> *atomIds = nullptr,
294 const std::vector<unsigned int> *confIds = nullptr,
295 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
296 unsigned int maxIters = 50, std::vector<double> *RMSlist = nullptr);
297
298namespace details {
299//! Converts terminal atoms in groups like nitro or carboxylate to be symmetry
300/// equivalent
301RDKIT_MOLALIGN_EXPORT void symmetrizeTerminalAtoms(RWMol &mol);
302} // namespace details
303} // namespace MolAlign
304} // namespace RDKit
305#endif
~MolAlignException() noexcept override=default
MolAlignException(const char *msg)
construct with an error message
MolAlignException(const std::string msg)
construct with an error message
const char * what() const noexcept override
get the error message
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_MOLALIGN_EXPORT
Definition export.h:273
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)