RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MorganGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_MORGANGEN_H_2018_07
13#define RD_MORGANGEN_H_2018_07
14
16#include <cstdint>
17
18namespace RDKit {
19
20namespace MorganFingerprint {
21
22/**
23 \brief Default atom invariants generator for Morgan fingerprint, generates
24 ECFP-type invariants
25
26 */
29 const bool df_includeRingMembership;
30
31 public:
32 /**
33 \brief Construct a new MorganAtomInvGenerator object
34
35 \param includeRingMembership : if set, whether or not the atom is in a ring
36 will be used in the invariant list.
37 */
38 MorganAtomInvGenerator(const bool includeRingMembership = true);
39
40 std::vector<std::uint32_t> *getAtomInvariants(
41 const ROMol &mol) const override;
42
43 std::string infoString() const override;
44 MorganAtomInvGenerator *clone() const override;
45};
46
47/**
48 \brief Alternative atom invariants generator for Morgan fingerprint, generate
49 FCFP-type invariants
50
51 */
54 std::vector<const ROMol *> *dp_patterns;
55
56 public:
57 /**
58 \brief Construct a new MorganFeatureAtomInvGenerator object
59
60 \param patterns : if provided should contain the queries used to assign
61 atom-types. if not provided, feature definitions adapted from reference:
62 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
63 Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
64 */
65 MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
66
67 std::vector<std::uint32_t> *getAtomInvariants(
68 const ROMol &mol) const override;
69
70 std::string infoString() const override;
72};
73
74/**
75 \brief Bond invariants generator for Morgan fingerprint
76
77 */
80 const bool df_useBondTypes;
81 const bool df_useChirality;
82
83 public:
84 /**
85 \brief Construct a new MorganBondInvGenerator object
86
87 \param useBondTypes : if set, bond types will be included as a part of the
88 bond invariants
89 \param useChirality : if set, chirality information will be included as a
90 part of the bond invariants
91 */
92 MorganBondInvGenerator(const bool useBondTypes = true,
93 const bool useChirality = false);
94
95 std::vector<std::uint32_t> *getBondInvariants(
96 const ROMol &mol) const override;
97
98 std::string infoString() const override;
99 MorganBondInvGenerator *clone() const override;
100 ~MorganBondInvGenerator() override = default;
101};
102
103/**
104 \brief Class for holding Morgan fingerprint specific arguments
105
106 */
108 public:
109 bool df_onlyNonzeroInvariants = false;
110 unsigned int d_radius = 3;
111 bool df_includeRedundantEnvironments = false;
112
113 std::string infoString() const override;
114
115 /**
116 \brief Construct a new MorganArguments object
117
118 \param radius the number of iterations to grow the fingerprint
119 \param countSimulation if set, use count simulation while generating the
120 fingerprint
121 \param includeChirality if set, chirality information will be added to the
122 generated bit id, independently from bond invariants
123 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
124 have a nonzero invariant
125 \param countBounds boundaries for count simulation, corresponding bit will
126 be set if the count is higher than the number provided for that spot
127 \param fpSize size of the generated fingerprint, does not affect the sparse
128 versions
129 \param includeRedundantEnvironments if set redundant environments will be
130 included in the fingerprint
131 */
132 MorganArguments(unsigned int radius, bool countSimulation = false,
133 bool includeChirality = false,
134 bool onlyNonzeroInvariants = false,
135 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
136 std::uint32_t fpSize = 2048,
137 bool includeRedundantEnvironments = false)
138 : FingerprintArguments(countSimulation, countBounds, fpSize, 1,
139 includeChirality),
140 df_onlyNonzeroInvariants(onlyNonzeroInvariants),
141 d_radius(radius),
142 df_includeRedundantEnvironments(includeRedundantEnvironments) {};
143};
144
145/**
146 \brief Class for holding the bit-id created from Morgan fingerprint
147 environments and the additional data necessary extra outputs
148
149 */
150template <typename OutputType>
152 : public AtomEnvironment<OutputType> {
153 const OutputType d_code;
154 const unsigned int d_atomId;
155 const unsigned int d_layer;
156
157 public:
158 OutputType getBitId(
159 FingerprintArguments *arguments, // unused
160 const std::vector<std::uint32_t> *atomInvariants, // unused
161 const std::vector<std::uint32_t> *bondInvariants, // unused
162 AdditionalOutput *additionalOutput, // unused
163 const bool hashResults = false, // unused
164 const std::uint64_t fpSize = 0 // unused
165 ) const override;
167 size_t bitId) const override;
168
169 /**
170 \brief Construct a new MorganAtomEnv object
171
172 \param code bit id generated from this environment
173 \param atomId atom id of the atom at the center of this environment
174 \param layer radius of this environment
175 */
176 MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
177 const unsigned int layer);
178};
179
180/**
181 \brief Class that generates atom environments for Morgan fingerprint
182
183 */
184template <typename OutputType>
186 : public AtomEnvironmentGenerator<OutputType> {
187 public:
188 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
189 const ROMol &mol, FingerprintArguments *arguments,
190 const std::vector<std::uint32_t> *fromAtoms,
191 const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
192 const AdditionalOutput *additionalOutput,
193 const std::vector<std::uint32_t> *atomInvariants,
194 const std::vector<std::uint32_t> *bondInvariants,
195 const bool hashResults = false) const override;
196
197 std::string infoString() const override;
198 OutputType getResultSize() const override;
199};
200
201/**
202 \brief Get a fingerprint generator for Morgan fingerprint
203
204 \tparam OutputType determines the size of the bitIds and the result, can be 32
205 or 64 bit unsigned integer
206
207 \param radius the number of iterations to grow the fingerprint
208
209 \param countSimulation if set, use count simulation while generating the
210 fingerprint
211
212 \param includeChirality if set, chirality information will be added to the
213 generated bit id, independently from bond invariants
214
215 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
216 have a nonzero invariant
217
218 \param countBounds boundaries for count simulation, corresponding bit will be
219 set if the count is higher than the number provided for that spot
220
221 \param fpSize size of the generated fingerprint, does not affect the sparse
222 versions
223 \param countSimulation if set, use count simulation while generating the
224 fingerprint
225 \param includeChirality sets includeChirality flag for both MorganArguments
226 and the default bond generator MorganBondInvGenerator
227 \param useBondTypes if set, bond types will be included as a part of the
228 default bond invariants
229 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
230 have a nonzero invariant
231 \param includeRedundantEnvironments if set redundant environments will be
232 included in the fingerprint
233 \param atomInvariantsGenerator custom atom invariants generator to use
234 \param bondInvariantsGenerator custom bond invariants generator to use
235 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
236 fingerprint generator
237 \param ownsBondInvGen if set bond invariants generator is destroyed with the
238 fingerprint generator
239
240 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
241
242This generator supports the following \c AdditionalOutput types:
243 - \c atomToBits : which bits each atom is the central atom for
244 - \c atomCounts : how many bits each atom sets
245 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
246
247 */
248template <typename OutputType>
250 unsigned int radius, bool countSimulation, bool includeChirality,
255 std::uint32_t fpSize = 2048,
256 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
257 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
258
259/**
260 \brief Get a fingerprint generator for Morgan fingerprint
261
262 \tparam OutputType determines the size of the bitIds and the result, can be 32
263 or 64 bit unsigned integer
264
265 \param radius the number of iterations to grow the fingerprint
266
267 \param countSimulation if set, use count simulation while generating the
268 fingerprint
269
270 \param includeChirality if set, chirality information will be added to the
271 generated bit id, independently from bond invariants
272
273 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
274 have a nonzero invariant
275
276 \param countBounds boundaries for count simulation, corresponding bit will be
277 set if the count is higher than the number provided for that spot
278
279 \param fpSize size of the generated fingerprint, does not affect the sparse
280 versions
281 \param countSimulation if set, use count simulation while generating the
282 fingerprint
283 \param includeChirality sets includeChirality flag for both MorganArguments
284 and the default bond generator MorganBondInvGenerator
285 \param useBondTypes if set, bond types will be included as a part of the
286 default bond invariants
287 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
288 have a nonzero invariant
289 \param atomInvariantsGenerator custom atom invariants generator to use
290 \param bondInvariantsGenerator custom bond invariants generator to use
291 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
292 fingerprint generator
293 \param ownsBondInvGen if set bond invariants generator is destroyed with the
294 fingerprint generator
295
296 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
297
298This generator supports the following \c AdditionalOutput types:
299 - \c atomToBits : which bits each atom is the central atom for
300 - \c atomCounts : how many bits each atom sets
301 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
302
303 */
304template <typename OutputType>
306 unsigned int radius, bool countSimulation = false,
307 bool includeChirality = false, bool useBondTypes = true,
308 bool onlyNonzeroInvariants = false,
311 std::uint32_t fpSize = 2048,
312 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
313 bool ownsAtomInvGen = false, bool ownsBondInvGen = false) {
319};
320
321} // namespace MorganFingerprint
322} // namespace RDKit
323
324#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
MorganArguments(unsigned int radius, bool countSimulation=false, bool includeChirality=false, bool onlyNonzeroInvariants=false, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, bool includeRedundantEnvironments=false)
Construct a new MorganArguments object.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
Class for holding the bit-id created from Morgan fingerprint environments and the additional data nec...
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
Default atom invariants generator for Morgan fingerprint, generates ECFP-type invariants.
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
MorganAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
Bond invariants generator for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
Class that generates atom environments for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
Alternative atom invariants generator for Morgan fingerprint, generate FCFP-type invariants.
MorganFeatureAtomInvGenerator(std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
MorganFeatureAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(unsigned int radius, bool countSimulation, bool includeChirality, bool useBondTypes, bool onlyNonzeroInvariants, bool includeRedundantEnvironments, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, std::uint32_t fpSize=2048, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, bool ownsAtomInvGen=false, bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.
bool rdvalue_is(const RDValue_cast_t)