RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolEnumerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020-2021 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifndef RDKIT_MOLENUMERATOR_H
11#define RDKIT_MOLENUMERATOR_H
12
13#include <RDGeneral/export.h>
14#include <GraphMol/RDKitBase.h>
15#include <GraphMol/MolBundle.h>
16
17#include <vector>
18#include <map>
19#include <string>
20#include <memory>
21#include <limits>
22
23namespace RDKit {
24class ChemicalReaction;
25namespace MolEnumerator {
26
27namespace detail {
28extern const std::string idxPropName;
31} // namespace detail
32
33//! abstract base class for the a molecule enumeration operation
35 public:
37 virtual ~MolEnumeratorOp() {}
38 //! returns a vector of the number of possible variations at variability point
39 //! covered by this operation
40 virtual std::vector<size_t> getVariationCounts() const = 0;
41 //! returns a the molecule corresponding to a particular variation
42 /*! which.size() should be equal to the number of variation counts.
43 */
44 virtual std::unique_ptr<ROMol> operator()(
45 const std::vector<size_t> &which) const = 0;
46 //! initializes this operation to work on a particular molecule
47 virtual void initFromMol(const ROMol &mol) = 0;
48 //! polymorphic copy
49 virtual std::unique_ptr<MolEnumeratorOp> copy() const = 0;
50};
51
52//! Molecule enumeration operation corresponding to position variation bonds
53/*! This uses ATTACH and ENDPTS properties on bonds and requires that the bond
54 * has one dummy atom (which will be discarded). The other atom of the bond will
55 * be connected to the atoms listed in the ENDPTS property
56 */
58 public:
60 PositionVariationOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
61 PRECONDITION(mol, "bad molecule");
62 initFromMol();
63 }
64 PositionVariationOp(const ROMol &mol) : dp_mol(new ROMol(mol)) {
65 initFromMol();
66 }
68 : dp_mol(other.dp_mol), d_variationPoints(other.d_variationPoints) {}
70 if (&other == this) {
71 return *this;
72 }
73 dp_mol = other.dp_mol;
74 d_variationPoints = other.d_variationPoints;
75 return *this;
76 }
77 //! \override
78 std::vector<size_t> getVariationCounts() const override;
79
80 //! \override
81 std::unique_ptr<ROMol> operator()(
82 const std::vector<size_t> &which) const override;
83
84 //! \override
85 void initFromMol(const ROMol &mol) override;
86
87 //! \override
88 std::unique_ptr<MolEnumeratorOp> copy() const override {
89 return std::unique_ptr<MolEnumeratorOp>(new PositionVariationOp(*this));
90 }
91
92 private:
93 std::shared_ptr<ROMol> dp_mol{nullptr};
94 std::vector<std::pair<unsigned int, std::vector<unsigned int>>>
95 d_variationPoints{};
96 std::vector<size_t> d_dummiesAtEachPoint{};
97 void initFromMol();
98};
99
100//! Molecule enumeration operation corresponding to LINKNODES
101/*!
102 */
104 public:
106 LinkNodeOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
107 PRECONDITION(mol, "bad molecule");
108 initFromMol();
109 }
110 LinkNodeOp(const ROMol &mol) : dp_mol(new ROMol(mol)) { initFromMol(); }
111 LinkNodeOp(const LinkNodeOp &other)
112 : dp_mol(other.dp_mol),
113 dp_frame(other.dp_frame),
114 d_countAtEachPoint(other.d_countAtEachPoint),
115 d_variations(other.d_variations),
116 d_pointRanges(other.d_pointRanges),
117 d_isotopeMap(other.d_isotopeMap),
118 d_atomMap(other.d_atomMap) {}
120 if (&other == this) {
121 return *this;
122 }
123 dp_mol = other.dp_mol;
124 dp_frame = other.dp_frame;
125 d_countAtEachPoint = other.d_countAtEachPoint;
126 d_variations = other.d_variations;
127 d_pointRanges = other.d_pointRanges;
128 d_isotopeMap = other.d_isotopeMap;
129 d_atomMap = other.d_atomMap;
130 return *this;
131 }
132 //! \override
133 std::vector<size_t> getVariationCounts() const override;
134
135 //! \override
136 std::unique_ptr<ROMol> operator()(
137 const std::vector<size_t> &which) const override;
138
139 //! \override
140 void initFromMol(const ROMol &mol) override;
141
142 //! \override
143 std::unique_ptr<MolEnumeratorOp> copy() const override {
144 return std::unique_ptr<MolEnumeratorOp>(new LinkNodeOp(*this));
145 }
146
147 private:
148 std::shared_ptr<ROMol> dp_mol{nullptr};
149 std::shared_ptr<RWMol> dp_frame{nullptr};
150 std::vector<size_t> d_countAtEachPoint{};
151 std::vector<std::tuple<unsigned, unsigned, unsigned>> d_variations;
152 std::vector<std::pair<unsigned, unsigned>> d_pointRanges;
153 std::map<unsigned, unsigned> d_isotopeMap;
154 std::map<unsigned, Atom *> d_atomMap;
155
156 void initFromMol();
157};
158
159//! Molecule enumeration operation corresponding to SRUs
160/*!
161 This should be considered a work-in-progress and to be somewhat fragile.
162
163 NOTE: The SRU labels are parsed to infer the desired number of repetitions
164 allowed.
165
166 Known limitations:
167 - Overlapping SRUs, i.e. where one monomer is contained within another, are
168 not supported
169
170 */
172 public:
174 RepeatUnitOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
175 PRECONDITION(mol, "bad molecule");
176 initFromMol();
177 };
178 RepeatUnitOp(const ROMol &mol) : dp_mol(new ROMol(mol)) { initFromMol(); };
180 : d_maxNumRounds(other.d_maxNumRounds),
181 dp_mol(other.dp_mol),
182 dp_frame(other.dp_frame),
183 d_repeats(other.d_repeats),
184 d_countAtEachPoint(other.d_countAtEachPoint),
185 d_variations(other.d_variations),
186 d_pointRanges(other.d_pointRanges),
187 d_isotopeMap(other.d_isotopeMap),
188 d_atomMap(other.d_atomMap),
189 d_minRepeatCounts(other.d_minRepeatCounts) {};
191 if (&other == this) {
192 return *this;
193 }
194 dp_mol = other.dp_mol;
195 dp_frame = other.dp_frame;
196 d_repeats = other.d_repeats;
197 d_countAtEachPoint = other.d_countAtEachPoint;
198 d_variations = other.d_variations;
199 d_pointRanges = other.d_pointRanges;
200 d_isotopeMap = other.d_isotopeMap;
201 d_atomMap = other.d_atomMap;
202 d_maxNumRounds = other.d_maxNumRounds;
203 d_minRepeatCounts = other.d_minRepeatCounts;
204 return *this;
205 };
206 //! \override
207 std::vector<size_t> getVariationCounts() const override;
208
209 //! \override
210 std::unique_ptr<ROMol> operator()(
211 const std::vector<size_t> &which) const override;
212
213 //! \override
214 void initFromMol(const ROMol &mol) override;
215
216 //! \override
217 std::unique_ptr<MolEnumeratorOp> copy() const override {
218 return std::unique_ptr<MolEnumeratorOp>(new RepeatUnitOp(*this));
219 }
220
221 static const size_t DEFAULT_REPEAT_COUNT;
222 size_t d_maxNumRounds = std::numeric_limits<size_t>::max();
223
224 private:
225 std::shared_ptr<ROMol> dp_mol{nullptr};
226 std::shared_ptr<RWMol> dp_frame{nullptr};
227 std::vector<std::shared_ptr<RWMol>> d_repeats;
228 std::vector<RWMol> dp_repeatUnits{};
229 std::vector<size_t> d_countAtEachPoint{};
230 std::vector<unsigned> d_sruOrder{};
231 std::vector<std::tuple<unsigned, unsigned, unsigned>> d_variations;
232 std::vector<std::pair<unsigned, unsigned>> d_pointRanges;
233 std::map<unsigned, unsigned> d_isotopeMap;
234 std::map<unsigned, Atom *> d_atomMap;
235 std::vector<size_t> d_minRepeatCounts;
236
237 void initFromMol();
238};
239
240//! Parameters used to control the molecule enumeration
242 bool sanitize = false;
243 size_t maxToEnumerate = 1000;
244 bool doRandom = false; //< not yet implemented
245 int randomSeed = -1; //< not yet implemented
246 std::shared_ptr<MolEnumeratorOp> dp_operation;
247};
248
249//! Returns a MolBundle containing the molecules resulting from applying the
250//! operators contained in \c paramsLists to \c mol.
251//! the operators are applied in order
252/*!
253NOTE: the current implementation does not support molecules which include
254both LINKNODE and SRU features.
255
256*/
258enumerate(const ROMol &mol, const std::vector<MolEnumeratorParams> &paramsList);
259
260//! Returns a MolBundle containing the molecules resulting from applying the
261//! enumerable operators contained in \c mol.
262/*!
263\param maxPerOperation: the maximum number of molecules which an individual
264operation is allowed to generate
265
266NOTE: the current implementation does not support molecules which include
267both LINKNODE and SRU features.
268
269*/
271 size_t maxPerOperation = 0);
272
273//! Returns a MolBundle containing the molecules resulting from applying the
274//! operator contained in \c params to \c mol.
275inline MolBundle enumerate(const ROMol &mol,
276 const MolEnumeratorParams &params) {
277 std::vector<MolEnumeratorParams> v = {params};
278 return enumerate(mol, v);
279};
280} // namespace MolEnumerator
281} // namespace RDKit
282
283#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines a class for managing bundles of molecules.
pulls in the core RDKit functionality
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:59
Molecule enumeration operation corresponding to LINKNODES.
LinkNodeOp(const LinkNodeOp &other)
void initFromMol(const ROMol &mol) override
\override
LinkNodeOp(const std::shared_ptr< ROMol > mol)
std::vector< size_t > getVariationCounts() const override
\override
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
LinkNodeOp & operator=(const LinkNodeOp &other)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
abstract base class for the a molecule enumeration operation
virtual std::vector< size_t > getVariationCounts() const =0
virtual std::unique_ptr< MolEnumeratorOp > copy() const =0
polymorphic copy
virtual std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const =0
returns a the molecule corresponding to a particular variation
virtual void initFromMol(const ROMol &mol)=0
initializes this operation to work on a particular molecule
Molecule enumeration operation corresponding to position variation bonds.
PositionVariationOp & operator=(const PositionVariationOp &other)
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
std::vector< size_t > getVariationCounts() const override
\override
void initFromMol(const ROMol &mol) override
\override
PositionVariationOp(const PositionVariationOp &other)
PositionVariationOp(const std::shared_ptr< ROMol > mol)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
Molecule enumeration operation corresponding to SRUs.
static const size_t DEFAULT_REPEAT_COUNT
void initFromMol(const ROMol &mol) override
\override
RepeatUnitOp(const RepeatUnitOp &other)
RepeatUnitOp & operator=(const RepeatUnitOp &other)
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
std::vector< size_t > getVariationCounts() const override
\override
RepeatUnitOp(const std::shared_ptr< ROMol > mol)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
#define RDKIT_MOLENUMERATOR_EXPORT
Definition export.h:313
const std::string idxPropName
void removeOrigIndices(ROMol &mol)
void preserveOrigIndices(ROMol &mol)
RDKIT_MOLENUMERATOR_EXPORT MolBundle enumerate(const ROMol &mol, const std::vector< MolEnumeratorParams > &paramsList)
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
Parameters used to control the molecule enumeration.
std::shared_ptr< MolEnumeratorOp > dp_operation