RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Embedder.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2017 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_EMBEDDER_H_GUARD
13#define RD_EMBEDDER_H_GUARD
14
15#include <map>
16#include <utility>
17#include <Geometry/point.h>
18#include <GraphMol/ROMol.h>
19#include <boost/shared_ptr.hpp>
21
22namespace RDKit {
23namespace DGeomHelpers {
24
39
40//! Parameter object for controlling embedding
41/*!
42 numConfs Number of conformations to be generated
43 numThreads Sets the number of threads to use (more than one thread
44 will only be used if the RDKit was build with multithread
45 support) If set to zero, the max supported by the system will
46 be used.
47 maxIterations Max. number of times the embedding will be tried if
48 coordinates are not obtained successfully. The default
49 value is 10x the number of atoms.
50 randomSeed provides a seed for the random number generator (so that
51 the same coordinates can be obtained for a
52 molecule on multiple runs) If -1, the
53 RNG will not be seeded.
54 clearConfs Clear all existing conformations on the molecule
55 useRandomCoords Start the embedding from random coordinates instead of
56 using eigenvalues of the distance matrix.
57 boxSizeMult Determines the size of the box that is used for
58 random coordinates. If this is a positive number, the
59 side length will equal the largest element of the distance
60 matrix times \c boxSizeMult. If this is a negative number,
61 the side length will equal \c -boxSizeMult (i.e. independent
62 of the elements of the distance matrix).
63 randNegEig Picks coordinates at random when a embedding process produces
64 negative eigenvalues
65 numZeroFail Fail embedding if we find this many or more zero eigenvalues
66 (within a tolerance)
67 pruneRmsThresh Retain only the conformations out of 'numConfs' after
68 embedding that are at least this far apart from each other.
69 RMSD is computed on the heavy atoms.
70 Prunining is greedy; i.e. the first embedded conformation is
71 retained and from then on only those that are at least
72 \c pruneRmsThresh away from already
73 retained conformations are kept. The pruning is done
74 after embedding and bounds violation minimization.
75 No pruning by default.
76 coordMap a map of int to Point3D, between atom IDs and their locations
77 their locations. If this container is provided, the
78 coordinates are used to set distance constraints on the
79 embedding. The resulting conformer(s) should have distances
80 between the specified atoms that reproduce those between the
81 points in \c coordMap. Because the embedding produces a
82 molecule in an arbitrary reference frame, an alignment step
83 is required to actually reproduce the provided coordinates.
84 optimizerForceTol set the tolerance on forces in the DGeom optimizer
85 (this shouldn't normally be altered in client code).
86 ignoreSmoothingFailures try to embed the molecule even if triangle bounds
87 smoothing fails
88 enforceChirality enforce the correct chirality if chiral centers are present
89 useExpTorsionAnglePrefs impose experimental torsion-angle preferences
90 useBasicKnowledge impose "basic knowledge" terms such as flat
91 aromatic rings, ketones, etc.
92 ETversion version of the experimental torsion-angle preferences
93 verbose print output of experimental torsion-angle preferences
94 basinThresh set the basin threshold for the DGeom force field,
95 (this shouldn't normally be altered in client code).
96 onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
97 boundsMat custom bound matrix to specify upper and lower bounds of atom
98 pairs
99 embedFragmentsSeparately embed each fragment of molecule in turn
100 useSmallRingTorsions optional torsions to improve small ring conformer
101 sampling
102 useMacrocycleTorsions optional torsions to improve macrocycle conformer
103 sampling
104 useMacrocycle14config If 1-4 distances bound heuristics for
105 macrocycles is used
106 CPCI custom columbic interactions between atom pairs
107 callback void pointer to a function for reporting progress,
108 will be called with the current iteration number.
109 forceTransAmides constrain amide bonds to be trans.
110 useSymmetryForPruning use molecule symmetry when doing the RMSD pruning.
111 NOTE that for reasons of computational efficiency,
112 setting this will also set onlyHeavyAtomsForRMS to
113 true.
114 trackFailures keep track of which checks during the embedding process fail
115 failures if trackFailures is true, this is used to track the number
116 of times each embedding check fails
117 enableSequentialRandomSeeds handle the random number seeds so that
118 conformer generation can be restarted
119*/
121 unsigned int maxIterations{0};
122 int numThreads{1};
123 int randomSeed{-1};
124 bool clearConfs{true};
125 bool useRandomCoords{false};
126 double boxSizeMult{2.0};
127 bool randNegEig{true};
128 unsigned int numZeroFail{1};
129 const std::map<int, RDGeom::Point3D> *coordMap{nullptr};
130 double optimizerForceTol{1e-3};
131 bool ignoreSmoothingFailures{false};
132 bool enforceChirality{true};
133 bool useExpTorsionAnglePrefs{false};
134 bool useBasicKnowledge{false};
135 bool verbose{false};
136 double basinThresh{5.0};
137 double pruneRmsThresh{-1.0};
138 bool onlyHeavyAtomsForRMS{true};
139 unsigned int ETversion{1};
140 boost::shared_ptr<const DistGeom::BoundsMatrix> boundsMat;
141 bool embedFragmentsSeparately{true};
142 bool useSmallRingTorsions{false};
143 bool useMacrocycleTorsions{false};
144 bool useMacrocycle14config{false};
145 std::shared_ptr<std::map<std::pair<unsigned int, unsigned int>, double>> CPCI;
146 void (*callback)(unsigned int);
147 bool forceTransAmides{true};
148 bool useSymmetryForPruning{true};
149 double boundsMatForceScaling{1.0};
150 bool trackFailures{false};
151 std::vector<unsigned int> failures;
152 bool enableSequentialRandomSeeds{false};
153 bool symmetrizeConjugatedTerminalGroupsForPruning{true};
154
155 EmbedParameters() : boundsMat(nullptr), CPCI(nullptr), callback(nullptr) {}
157 unsigned int maxIterations, int numThreads, int randomSeed,
158 bool clearConfs, bool useRandomCoords, double boxSizeMult,
159 bool randNegEig, unsigned int numZeroFail,
160 const std::map<int, RDGeom::Point3D> *coordMap, double optimizerForceTol,
161 bool ignoreSmoothingFailures, bool enforceChirality,
162 bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose,
163 double basinThresh, double pruneRmsThresh, bool onlyHeavyAtomsForRMS,
164 unsigned int ETversion = 2,
165 const DistGeom::BoundsMatrix *boundsMat = nullptr,
166 bool embedFragmentsSeparately = true, bool useSmallRingTorsions = false,
167 bool useMacrocycleTorsions = false, bool useMacrocycle14config = false,
168 std::shared_ptr<std::map<std::pair<unsigned int, unsigned int>, double>>
169 CPCI = nullptr,
170 void (*callback)(unsigned int) = nullptr)
171 : maxIterations(maxIterations),
172 numThreads(numThreads),
173 randomSeed(randomSeed),
174 clearConfs(clearConfs),
175 useRandomCoords(useRandomCoords),
176 boxSizeMult(boxSizeMult),
177 randNegEig(randNegEig),
178 numZeroFail(numZeroFail),
179 coordMap(coordMap),
180 optimizerForceTol(optimizerForceTol),
181 ignoreSmoothingFailures(ignoreSmoothingFailures),
182 enforceChirality(enforceChirality),
183 useExpTorsionAnglePrefs(useExpTorsionAnglePrefs),
184 useBasicKnowledge(useBasicKnowledge),
185 verbose(verbose),
186 basinThresh(basinThresh),
187 pruneRmsThresh(pruneRmsThresh),
188 onlyHeavyAtomsForRMS(onlyHeavyAtomsForRMS),
189 ETversion(ETversion),
190 boundsMat(boundsMat),
191 embedFragmentsSeparately(embedFragmentsSeparately),
192 useSmallRingTorsions(useSmallRingTorsions),
193 useMacrocycleTorsions(useMacrocycleTorsions),
194 useMacrocycle14config(useMacrocycle14config),
195 CPCI(std::move(CPCI)),
196 callback(callback) {}
197};
198
199//! update parameters from a JSON string
201 EmbedParameters &params, const std::string &json);
202
203//! Embed multiple conformations for a molecule
205 unsigned int numConfs,
206 EmbedParameters &params);
207inline INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs,
208 EmbedParameters &params) {
210 EmbedMultipleConfs(mol, res, numConfs, params);
211 return res;
212}
213
214//! Compute an embedding (in 3D) for the specified molecule using Distance
215/// Geometry
216inline int EmbedMolecule(ROMol &mol, EmbedParameters &params) {
218 EmbedMultipleConfs(mol, confIds, 1, params);
219
220 int res;
221 if (confIds.size()) {
222 res = confIds[0];
223 } else {
224 res = -1;
225 }
226 return res;
227}
228
229//! Compute an embedding (in 3D) for the specified molecule using Distance
230/// Geometry
231/*!
232 The following operations are performed (in order) here:
233 -# Build a distance bounds matrix based on the topology, including 1-5
234 distances but not VDW scaling
235 -# Triangle smooth this bounds matrix
236 -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
237 scaling, and repeat step 2
238 -# Pick a distance matrix at random using the bounds matrix
239 -# Compute initial coordinates from the distance matrix
240 -# Repeat steps 3 and 4 until maxIterations is reached or embedding is
241 successful
242 -# Adjust initial coordinates by minimizing a Distance Violation error
243 function
244 **NOTE**: if the molecule has multiple fragments, they will be embedded
245 separately,
246 this means that they will likely occupy the same region of space.
247 \param mol Molecule of interest
248 \param maxIterations Max. number of times the embedding will be tried if
249 coordinates are not obtained successfully. The default
250 value is 10x the number of atoms.
251 \param seed provides a seed for the random number generator (so that
252 the same coordinates can be obtained for a molecule on
253 multiple runs). If negative, the RNG will not be seeded.
254 \param clearConfs Clear all existing conformations on the molecule
255 \param useRandomCoords Start the embedding from random coordinates instead of
256 using eigenvalues of the distance matrix.
257 \param boxSizeMult Determines the size of the box that is used for
258 random coordinates. If this is a positive number, the
259 side length will equal the largest element of the
260 distance matrix times \c boxSizeMult. If this is a
261 negative number, the side length will equal
262 \c -boxSizeMult (i.e. independent of the elements of the
263 distance matrix).
264 \param randNegEig Picks coordinates at random when a embedding process
265 produces negative eigenvalues
266 \param numZeroFail Fail embedding if we find this many or more zero
267 eigenvalues (within a tolerance)
268 \param coordMap a map of int to Point3D, between atom IDs and their locations
269 their locations. If this container is provided, the
270 coordinates are used to set distance constraints on the
271 embedding. The resulting conformer(s) should have distances
272 between the specified atoms that reproduce those between the
273 points in \c coordMap. Because the embedding produces a
274 molecule in an arbitrary reference frame, an alignment step
275 is required to actually reproduce the provided coordinates.
276 \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
277 (this shouldn't normally be altered in client code).
278 \param ignoreSmoothingFailures try to embed the molecule even if triangle
279 bounds smoothing fails
280 \param enforceChirality enforce the correct chirality if chiral centers are
281 present
282 \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
283 \param useBasicKnowledge impose "basic knowledge" terms such as flat
284 aromatic rings, ketones, etc.
285 \param verbose print output of experimental torsion-angle preferences
286 \param basinThresh set the basin threshold for the DGeom force field,
287 (this shouldn't normally be altered in client code).
288 \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
289 \param ETversion version of torsion preferences to use
290 \param useSmallRingTorsions optional torsions to improve small ring
291 conformer sampling
292 \param useMacrocycleTorsions optional torsions to improve macrocycle
293 conformer sampling
294 \param useMacrocycle14config If 1-4 distances bound heuristics for
295 macrocycles is used
296
297 \return ID of the conformer added to the molecule, -1 if the emdedding failed
298*/
299inline int EmbedMolecule(
300 ROMol &mol, unsigned int maxIterations = 0, int seed = -1,
301 bool clearConfs = true, bool useRandomCoords = false,
302 double boxSizeMult = 2.0, bool randNegEig = true,
303 unsigned int numZeroFail = 1,
304 const std::map<int, RDGeom::Point3D> *coordMap = nullptr,
305 double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
306 bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
307 bool useBasicKnowledge = false, bool verbose = false,
308 double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false,
309 unsigned int ETversion = 2, bool useSmallRingTorsions = false,
310 bool useMacrocycleTorsions = true, bool useMacrocycle14config = true) {
311 EmbedParameters params(
312 maxIterations, 1, seed, clearConfs, useRandomCoords, boxSizeMult,
313 randNegEig, numZeroFail, coordMap, optimizerForceTol,
314 ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
315 useBasicKnowledge, verbose, basinThresh, -1.0, onlyHeavyAtomsForRMS,
316 ETversion, nullptr, true, useSmallRingTorsions, useMacrocycleTorsions,
317 useMacrocycle14config);
318 return EmbedMolecule(mol, params);
319};
320
321//*! Embed multiple conformations for a molecule
322/*!
323 This is kind of equivalent to calling EmbedMolecule multiple times - just that
324 the bounds
325 matrix is computed only once from the topology
326 **NOTE**: if the molecule has multiple fragments, they will be embedded
327 separately,
328 this means that they will likely occupy the same region of space.
329 \param mol Molecule of interest
330 \param res Used to return the resulting conformer ids
331 \param numConfs Number of conformations to be generated
332 \param numThreads Sets the number of threads to use (more than one thread
333 will only be used if the RDKit was build with
334 multithread
335 support). If set to zero, the max supported by the
336 system
337 will be used.
338 \param maxIterations Max. number of times the embedding will be tried if
339 coordinates are not obtained successfully. The default
340 value is 10x the number of atoms.
341 \param seed provides a seed for the random number generator (so that
342 the same coordinates can be obtained for a molecule on
343 multiple runs). If negative, the RNG will not be seeded.
344 \param clearConfs Clear all existing conformations on the molecule
345 \param useRandomCoords Start the embedding from random coordinates instead of
346 using eigenvalues of the distance matrix.
347 \param boxSizeMult Determines the size of the box that is used for
348 random coordinates. If this is a positive number, the
349 side length will equal the largest element of the
350 distance matrix times \c boxSizeMult. If this is a
351 negative number, the side length will equal
352 \c -boxSizeMult (i.e. independent of the elements of the
353 distance matrix).
354 \param randNegEig Picks coordinates at random when a embedding process
355 produces negative eigenvalues
356 \param numZeroFail Fail embedding if we find this many or more zero
357 eigenvalues (within a tolerance)
358 \param pruneRmsThresh Retain only the conformations out of 'numConfs' after
359 embedding that are at least this far apart from each
360 other. RMSD is computed on the heavy atoms.
361 Pruning is greedy; i.e. the first embedded conformation
362 is retained and from then on only those that are at
363 least
364 pruneRmsThresh away from already retained conformations
365 are kept. The pruning is done after embedding and
366 bounds violation minimization. No pruning by default.
367 \param coordMap a map of int to Point3D, between atom IDs and their locations
368 their locations. If this container is provided, the
369 coordinates are used to set distance constraints on the
370 embedding. The resulting conformer(s) should have distances
371 between the specified atoms that reproduce those between the
372 points in \c coordMap. Because the embedding produces a
373 molecule in an arbitrary reference frame, an alignment step
374 is required to actually reproduce the provided coordinates.
375 \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
376 (this shouldn't normally be altered in client code).
377 \param ignoreSmoothingFailures try to embed the molecule even if triangle
378 bounds smoothing fails
379 \param enforceChirality enforce the correct chirality if chiral centers are
380 present
381 \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
382 \param useBasicKnowledge impose "basic knowledge" terms such as flat
383 aromatic rings, ketones, etc.
384 \param verbose print output of experimental torsion-angle preferences
385 \param basinThresh set the basin threshold for the DGeom force field,
386 (this shouldn't normally be altered in client code).
387 \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
388 \param ETversion version of torsion preferences to use
389 \param useSmallRingTorsions optional torsions to improve small ring
390 conformer sampling
391 \param useMacrocycleTorsions optional torsions to improve macrocycle
392 conformer sampling
393 \param useMacrocycle14config If 1-4 distances bound heuristics for
394 macrocycles is used
395
396*/
398 ROMol &mol, INT_VECT &res, unsigned int numConfs = 10, int numThreads = 1,
399 unsigned int maxIterations = 30, int seed = -1, bool clearConfs = true,
400 bool useRandomCoords = false, double boxSizeMult = 2.0,
401 bool randNegEig = true, unsigned int numZeroFail = 1,
402 double pruneRmsThresh = -1.0,
403 const std::map<int, RDGeom::Point3D> *coordMap = nullptr,
404 double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
405 bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
406 bool useBasicKnowledge = false, bool verbose = false,
407 double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false,
408 unsigned int ETversion = 2, bool useSmallRingTorsions = false,
409 bool useMacrocycleTorsions = true, bool useMacrocycle14config = true) {
410 EmbedParameters params(
411 maxIterations, numThreads, seed, clearConfs, useRandomCoords, boxSizeMult,
412 randNegEig, numZeroFail, coordMap, optimizerForceTol,
413 ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
414 useBasicKnowledge, verbose, basinThresh, pruneRmsThresh,
415 onlyHeavyAtomsForRMS, ETversion, nullptr, true, useSmallRingTorsions,
416 useMacrocycleTorsions, useMacrocycle14config);
417 EmbedMultipleConfs(mol, res, numConfs, params);
418};
419//! \overload
421 ROMol &mol, unsigned int numConfs = 10, unsigned int maxIterations = 30,
422 int seed = -1, bool clearConfs = true, bool useRandomCoords = false,
423 double boxSizeMult = 2.0, bool randNegEig = true,
424 unsigned int numZeroFail = 1, double pruneRmsThresh = -1.0,
425 const std::map<int, RDGeom::Point3D> *coordMap = nullptr,
426 double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
427 bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
428 bool useBasicKnowledge = false, bool verbose = false,
429 double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false,
430 unsigned int ETversion = 2, bool useSmallRingTorsions = false,
431 bool useMacrocycleTorsions = false, bool useMacrocycle14config = false) {
432 EmbedParameters params(
433 maxIterations, 1, seed, clearConfs, useRandomCoords, boxSizeMult,
434 randNegEig, numZeroFail, coordMap, optimizerForceTol,
435 ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
436 useBasicKnowledge, verbose, basinThresh, pruneRmsThresh,
437 onlyHeavyAtomsForRMS, ETversion, nullptr, true, useSmallRingTorsions,
438 useMacrocycleTorsions, useMacrocycle14config);
440 EmbedMultipleConfs(mol, res, numConfs, params);
441 return res;
442};
443
444//! Parameters corresponding to Sereina Riniker's KDG approach
445RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters KDG;
446//! Parameters corresponding to Sereina Riniker's ETDG approach
447RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETDG;
448//! Parameters corresponding to Sereina Riniker's ETKDG approach
449RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDG;
450//! Parameters corresponding to Sereina Riniker's ETKDG approach - version 2
451RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDGv2;
452//! Parameters corresponding improved ETKDG by Wang, Witek, Landrum and Riniker
453//! (10.1021/acs.jcim.0c00025) - the macrocycle part
454RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDGv3;
455//! Parameters corresponding improved ETKDG by Wang, Witek, Landrum and Riniker
456//! (10.1021/acs.jcim.0c00025) - the small ring part
457RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters srETKDGv3;
458} // namespace DGeomHelpers
459} // namespace RDKit
460
461#endif
Defines the primary molecule class ROMol as well as associated typedefs.
Class to store the distance bound.
#define RDKIT_DISTGEOMHELPERS_EXPORT
Definition export.h:121
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDGv2
Parameters corresponding to Sereina Riniker's ETKDG approach - version 2.
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETDG
Parameters corresponding to Sereina Riniker's ETDG approach.
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDGv3
RDKIT_DISTGEOMHELPERS_EXPORT void updateEmbedParametersFromJSON(EmbedParameters &params, const std::string &json)
update parameters from a JSON string
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDG
Parameters corresponding to Sereina Riniker's ETKDG approach.
RDKIT_DISTGEOMHELPERS_EXPORT void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs, EmbedParameters &params)
Embed multiple conformations for a molecule.
int EmbedMolecule(ROMol &mol, EmbedParameters &params)
Definition Embedder.h:216
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters srETKDGv3
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters KDG
Parameters corresponding to Sereina Riniker's KDG approach.
Std stuff.
std::vector< int > INT_VECT
Definition types.h:289
bool rdvalue_is(const RDValue_cast_t)
Parameter object for controlling embedding.
Definition Embedder.h:120
EmbedParameters(unsigned int maxIterations, int numThreads, int randomSeed, bool clearConfs, bool useRandomCoords, double boxSizeMult, bool randNegEig, unsigned int numZeroFail, const std::map< int, RDGeom::Point3D > *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh, double pruneRmsThresh, bool onlyHeavyAtomsForRMS, unsigned int ETversion=2, const DistGeom::BoundsMatrix *boundsMat=nullptr, bool embedFragmentsSeparately=true, bool useSmallRingTorsions=false, bool useMacrocycleTorsions=false, bool useMacrocycle14config=false, std::shared_ptr< std::map< std::pair< unsigned int, unsigned int >, double > > CPCI=nullptr, void(*callback)(unsigned int)=nullptr)
Definition Embedder.h:156
std::vector< unsigned int > failures
Definition Embedder.h:151
boost::shared_ptr< const DistGeom::BoundsMatrix > boundsMat
Definition Embedder.h:140
std::shared_ptr< std::map< std::pair< unsigned int, unsigned int >, double > > CPCI
Definition Embedder.h:145