RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolStandardize.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2023 Susan H. Leung and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10/*! \file MolStandardize.h
11
12 \brief Defines the CleanupParameters and some convenience functions.
13
14*/
15#include <RDGeneral/export.h>
16#ifndef RD_MOLSTANDARDIZE_H
17#define RD_MOLSTANDARDIZE_H
18
19#include <string>
20#include <GraphMol/RDKitBase.h>
22
23namespace RDKit {
24class RWMol;
25class ROMol;
26
27namespace MolStandardize {
28
29//! The CleanupParameters structure defines the default parameters for the
30/// cleanup process and also allows the user to customize the process by
31/// changing the parameters.
32/*!
33
34 <b>Notes:</b>
35 - To customize the parameters, the structure must be initialized first.
36 (Another on the TODO list)
37 - For this project, not all the parameters have been revealed.
38 (TODO)
39
40*/
42 // TODO reveal all parameters
43 private:
44 const char *rdbase_cstr = std::getenv("RDBASE");
45
46 public:
47 std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
48 std::string normalizations;
49 std::string acidbaseFile;
50 std::string fragmentFile;
51 std::string tautomerTransforms;
52 int maxRestarts{200}; //!< The maximum number of times to attempt to apply
53 //!< the series of normalizations (default 200).
54 bool preferOrganic{false}; //!< Whether to prioritize organic fragments when
55 //!< choosing fragment parent (default False).
56 bool doCanonical{true}; //!< Whether to apply normalizations in a
57 //!< canonical order
58 int maxTautomers{1000}; //!< The maximum number of tautomers to enumerate
59 //!< (default 1000).
60 int maxTransforms{1000}; //!< The maximum number of tautomer
61 //!< transformations to apply (default 1000).
62 bool tautomerRemoveSp3Stereo{
63 true}; //!< Whether to remove stereochemistry from sp3 centers involved
64 //!< in tautomerism (defaults to true)
65 bool tautomerRemoveBondStereo{
66 true}; //!< Whether to remove stereochemistry from double bonds involved
67 //!< in tautomerism (defaults to true)
68 bool tautomerRemoveIsotopicHs{
69 true}; //!< Whether to remove isotopic Hs from centers involved in
70 //!< tautomerism (defaults to true)
71 bool tautomerReassignStereo{
72 true}; //!< Whether enumerate() should call assignStereochemistry on all
73 //!< generated tautomers (defaults to true)
74 bool largestFragmentChooserUseAtomCount{
75 true}; //!< Whether LargestFragmentChooser should use atom count as main
76 //!< criterion before MW (defaults to true)
77 bool largestFragmentChooserCountHeavyAtomsOnly{
78 false}; //!< Whether LargestFragmentChooser should only count heavy atoms
79 //!< (defaults to false)
80 std::vector<std::pair<std::string, std::string>> normalizationData;
81 std::vector<std::pair<std::string, std::string>> fragmentData;
82 std::vector<std::tuple<std::string, std::string, std::string>> acidbaseData;
83 std::vector<std::tuple<std::string, std::string, std::string, std::string>>
86};
87
88RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
90
92 CleanupParameters &params, const std::string &json);
93
94//! The cleanup function is equivalent to the
95/// molvs.Standardizer().standardize(mol) function. It calls the same steps,
96/// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
97/// Reionizer, RDKit AssignStereochemistry.
99 const RWMol *mol,
101//! \overload
102inline RWMol *cleanup(const RWMol &mol, const CleanupParameters &params =
104 return cleanup(&mol, params);
105};
106//! Works the same as cleanup(mol)
108 RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
109//! Operates on multiple molecules
111 std::vector<RWMol *> &mols, int numThreads = 1,
113
114//! Works the same as Normalizer().normalize(mol)
116 const RWMol *mol,
118//! Works the same as Normalizer().normalizeInPlace(mol)
120 RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
121//! Operates on multiple molecules
123 std::vector<RWMol *> &mols, int numThreads = 1,
125
126//! Works the same as Reionizer().reionize(mol)
128 const RWMol *mol,
130//! Works the same as Reionizer().reionizeInPlace(mol)
132 RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
133//! Operates on multiple molecules
135 std::vector<RWMol *> &mols, int numThreads = 1,
137
138//! Works the same as FragmentRemover().remove(mol)
140 const RWMol *mol,
142//! Works the same as FragmentRemover().removeInPlace(mol)
144 RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
145//! Operates on multiple molecules
147 std::vector<RWMol *> &mols, int numThreads = 1,
149
150//! Works the same as TautomerEnumerator().canonicalize(mol)
152 const RWMol *mol,
155 RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
156
157//! Returns the tautomer parent of a given molecule. The fragment parent is the
158/// standardized canonical tautomer of the molecule
160 const RWMol &mol,
162 bool skipStandardize = false);
165 bool skipStandardize = false);
167 std::vector<RWMol *> &mols, int numThreads = 1,
169 bool skipStandardize = false);
170
171//! Returns the fragment parent of a given molecule. The fragment parent is the
172/// largest organic covalent unit in the molecule.
174 const RWMol &mol,
176 bool skip_standardize = false);
179 bool skip_standardize = false);
181 std::vector<RWMol *> &mols, int numThreads = 1,
183 bool skip_standardize = false);
184
185//! calls removeStereochemistry() on the given molecule
187 const RWMol &mol,
189 bool skip_standardize = false);
192 bool skip_standardize = false);
194 std::vector<RWMol *> &mols, int numThreads = 1,
196 bool skip_standardize = false);
197
198//! removes all isotopes specifications from the given molecule
200 const RWMol &mol,
202 bool skip_standardize = false);
205 bool skip_standardize = false);
207 std::vector<RWMol *> &mols, int numThreads = 1,
209 bool skip_standardize = false);
210
211//! Returns the charge parent of a given molecule. The charge parent is the
212//! uncharged version of the fragment parent.
214 const RWMol &mol,
216 bool skip_standardize = false);
219 bool skip_standardize = false);
220//! operates on multiple molecules
222 std::vector<RWMol *> &mols, int numThreads = 1,
224 bool skip_standardize = false);
225
226//! Returns the super parent. The super parent is the fragment, charge,
227//! isotope, stereo, and tautomer parent of the molecule.
229 const RWMol &mol,
231 bool skip_standardize = false);
234 bool skip_standardize = false);
236 std::vector<RWMol *> &mols, int numThreads = 1,
238 bool skip_standardize = false);
239
240//! Convenience function for quickly standardizing a single SMILES string.
241/// Returns a standardized canonical SMILES string given a SMILES string.
242/// This is the equivalent of calling cleanup() on each of the molecules
244 const std::string &smiles);
245
246//! Do a disconnection of an organometallic complex according to rules
247//! preferred by Syngenta. All bonds to metals are broken, including
248//! covalent bonds to Group I/II metals (so including Grignards, lithium
249//! complexes etc.). The ligands are left in the charge states they came
250//! in with. If there are haptic bonds defined by a dummy atom bonded to
251//! a metal by a bond that has a _MolFileBondEndPts (which will contain the
252//! indices of the atoms involved in the haptic bond) then the dummy atom
253//! is removed also.
254//! Do the disconnection in place.
255//! The options are splitGrignards, splitAromaticC, adjustCharges and
256//! removeHapticDummies. Roll on C++20 and designated initializers!
259 true, true, false, true});
260//! As above, but returns new disconnected molecule.
263 true, true, false, true});
264//! As above, included for API consistency.
267 true, true, false, true}) {
269};
270
271//! TODO
273 const std::string &smiles,
275}; // namespace MolStandardize
276} // namespace RDKit
277#endif
pulls in the core RDKit functionality
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition export.h:345
RDKIT_MOLSTANDARDIZE_EXPORT void fragmentParentInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParentInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skipStandardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * reionize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionize(mol)
void disconnectOrganometallicsInPlace(RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo={ true, true, false, true})
As above, included for API consistency.
RDKIT_MOLSTANDARDIZE_EXPORT std::vector< std::string > enumerateTautomerSmiles(const std::string &smiles, const CleanupParameters &params=defaultCleanupParameters)
TODO.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * isotopeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
removes all isotopes specifications from the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * tautomerParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skipStandardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * fragmentParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * normalize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * removeFragments(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as FragmentRemover().remove(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void superParentInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * canonicalTautomer(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as TautomerEnumerator().canonicalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * stereoParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
calls removeStereochemistry() on the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT const CleanupParameters defaultCleanupParameters
Definition Fragment.h:25
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParentInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT void normalizeInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalizeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics(RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo={ true, true, false, true})
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * chargeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT void removeFragmentsInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as FragmentRemover().removeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParentInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(const std::string &smiles)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * cleanup(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT void reionizeInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionizeInPlace(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON(CleanupParameters &params, const std::string &json)
RDKIT_MOLSTANDARDIZE_EXPORT void canonicalTautomerInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT void cleanupInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as cleanup(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void chargeParentInPlace(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * superParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
std::vector< std::tuple< std::string, std::string, std::string, std::string > > tautomerTransformData
std::vector< std::tuple< std::string, std::string, std::string > > acidbaseData
std::vector< std::pair< std::string, std::string > > fragmentData
std::vector< std::pair< std::string, std::string > > normalizationData