RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RDKitFPGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_RDFINGERPRINTGEN_H_2018_07
13#define RD_RDFINGERPRINTGEN_H_2018_07
14
16
17namespace RDKit {
18namespace RDKitFP {
19
21 public:
22 unsigned int d_minPath;
23 unsigned int d_maxPath;
27
28 std::string infoString() const override;
29
30 /**
31 \brief Construct a new RDKitFPArguments object
32
33 \param minPath the minimum path length (in bonds) to be included
34 \param maxPath the maximum path length (in bonds) to be included
35 \param useHs toggles inclusion of Hs in paths (if the molecule has
36 explicit Hs)
37 \param branchedPaths toggles generation of branched subgraphs, not just
38 linear paths
39 \param useBondOrder toggles inclusion of bond orders in the path hashes
40 \param countSimulation if set, use count simulation while
41 generating the fingerprint
42 \param countBounds boundaries for count simulation, corresponding bit will
43 be set if the count is higher than the number provided for that spot
44 \param fpSize size of the generated fingerprint, does not affect the sparse
45 versions
46 \param numBitsPerFeature controls the number of bits that are set for each
47 path/subgraph found
48
49 */
50 RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs,
51 bool branchedPaths, bool useBondOrder, bool countSimulation,
52 const std::vector<std::uint32_t> countBounds,
53 std::uint32_t fpSize, std::uint32_t numBitsPerFeature);
54};
55
58 public:
59 std::vector<std::uint32_t> *getAtomInvariants(
60 const ROMol &mol) const override;
61
62 std::string infoString() const override;
63 RDKitFPAtomInvGenerator *clone() const override;
64};
65
66template <typename OutputType>
68 : public AtomEnvironment<OutputType> {
69 const OutputType d_bitId;
70 const boost::dynamic_bitset<> d_atomsInPath;
71 const INT_VECT d_bondPath;
72
73 public:
74 OutputType getBitId(
75 FingerprintArguments *arguments, // unused
76 const std::vector<std::uint32_t> *atomInvariants, // unused
77 const std::vector<std::uint32_t> *bondInvariants, // unused
78 AdditionalOutput *additionalOutput, // unused
79 bool hashResults = false, // unused
80 const std::uint64_t fpSize = 0 // unused
81 ) const override;
83 size_t bitId) const override;
84
85 /**
86 \brief Construct a new RDKitFPAtomEnv object
87
88 \param bitId bitId generated for this environment
89 \param atomsInPath holds atoms in this environment to set additional output
90 \param bondPath the bond path defining the environment
91
92 */
93 RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath,
94 INT_VECT bondPath)
95 : d_bitId(bitId),
96 d_atomsInPath(std::move(atomsInPath)),
97 d_bondPath(std::move(bondPath)) {}
98};
99
100template <typename OutputType>
102 : public AtomEnvironmentGenerator<OutputType> {
103 public:
104 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
105 const ROMol &mol, FingerprintArguments *arguments,
106 const std::vector<std::uint32_t> *fromAtoms,
107 const std::vector<std::uint32_t> *ignoreAtoms, int confId,
108 const AdditionalOutput *additionalOutput,
109 const std::vector<std::uint32_t> *atomInvariants,
110 const std::vector<std::uint32_t> *bondInvariants,
111 bool hashResults = false) const override;
112
113 std::string infoString() const override;
114 OutputType getResultSize() const override;
115
116}; // namespace RDKitFP
117
118/**
119 \brief Get a RDKit fingerprint generator with given parameters
120
121 \tparam OutputType determines the size of the bitIds and the result, can be 32
122 or 64 bit unsigned integer
123 \param minPath the minimum path length (in bonds) to be included
124 \param maxPath the maximum path length (in bonds) to be included
125 \param useHs toggles inclusion of Hs in paths (if the molecule has
126 explicit Hs)
127 \param branchedPaths toggles generation of branched subgraphs, not just
128 linear paths
129 \param useBondOrder toggles inclusion of bond orders in the path hashes
130 \param atomInvariantsGenerator custom atom invariants generator to use
131 \param countSimulation if set, use count simulation while
132 generating the fingerprint
133 \param countBounds boundaries for count simulation, corresponding bit will be
134 set if the count is higher than the number provided for that spot
135 \param fpSize size of the generated fingerprint, does not affect the sparse
136 versions
137 \param numBitsPerFeature controls the number of bits that are set for each
138 path/subgraph found
139 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
140 fingerprint generator
141
142 /return FingerprintGenerator<OutputType>* that generates RDKit fingerprints
143
144 This generator supports the following \c AdditionalOutput types:
145 - \c atomToBits : which bits each atom is involved in
146 - \c atomCounts : how many bits each atom sets
147 - \c bitPaths : map from bitId to vectors of bond indices for the individual
148 subgraphs
149
150 */
151template <typename OutputType>
153 unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
154 bool branchedPaths = true, bool useBondOrder = true,
156 bool countSimulation = false,
157 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
158 std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
159 bool ownsAtomInvGen = false);
160
161} // namespace RDKitFP
162} // namespace RDKit
163
164#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature)
Construct a new RDKitFPArguments object.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath, INT_VECT bondPath)
Construct a new RDKitFPAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
RDKitFPAtomInvGenerator * clone() const override
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
Std stuff.
std::vector< int > INT_VECT
Definition types.h:289
bool rdvalue_is(const RDValue_cast_t)