RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstructMatch.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SUBSTRUCTMATCH_H
12#define RD_SUBSTRUCTMATCH_H
13
14// std bits
15#include <vector>
16
17#include <unordered_set>
18#include <functional>
19#include <unordered_map>
20#include <cstdint>
21#include <string>
22
23#include <boost/dynamic_bitset.hpp>
24#if BOOST_VERSION >= 107100
25#define RDK_INTERNAL_BITSET_HAS_HASH
26#endif
27
29
30namespace RDKit {
31class ROMol;
32class Atom;
33class Bond;
34class ResonanceMolSupplier;
35class MolBundle;
36
37//! \brief used to return matches from substructure searching,
38//! The format is (queryAtomIdx, molAtomIdx)
39typedef std::vector<std::pair<int, int>> MatchVectType;
40
42 bool useChirality = false; //!< Use chirality in determining whether or not
43 //!< atoms/bonds match
44 bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
45 //!< determining whether atoms/bonds match
46 bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
47 //!< match each other
48 bool useQueryQueryMatches = false; //!< Consider query-query matches, not
49 //!< just simple matches
50 bool useGenericMatchers = false; //!< Looks for generic atoms in the query
51 //!< and uses them as part of the matching
52 bool recursionPossible = true; //!< Allow recursive queries
53 bool uniquify = true; //!< uniquify (by atom index) match results
54 unsigned int maxMatches = 1000; //!< maximum number of matches to return
55 int numThreads = 1; //!< number of threads to use when multi-threading
56 //!< is possible. 0 selects the number of
57 //!< concurrent threads supported by the hardware
58 //!< negative values are added to the number of
59 //!< concurrent threads supported by the hardware
60 std::vector<std::string> atomProperties; //!< atom properties that must be
61 //!< equivalent in order to match
62 std::vector<std::string> bondProperties; //!< bond properties that must be
63 //!< equivalent in order to match
64 std::function<bool(const ROMol &mol,
65 const std::vector<unsigned int> &match)>
66 extraFinalCheck; //!< a function to be called at the end to validate a
67 //!< match
68 unsigned int maxRecursiveMatches =
69 1000; //!< maximum number of matches that the recursive substructure
70 //!< matching should return
72};
73
75 SubstructMatchParameters &params, const std::string &json);
77 const SubstructMatchParameters &params);
78
79//! Find a substructure match for a query in a molecule
80/*!
81 \param mol The ROMol to be searched
82 \param query The query ROMol
83 \param matchParams Parameters controlling the matching
84
85 \return The matches, if any
86
87*/
89 const ROMol &mol, const ROMol &query,
91
92//! Find all substructure matches for a query in a ResonanceMolSupplier object
93/*!
94 \param resMolSuppl The ResonanceMolSupplier object to be searched
95 \param query The query ROMol
96 \param matchParams Parameters controlling the matching
97
98 \return The matches, if any
99
100*/
104
106 const MolBundle &bundle, const ROMol &query,
109 const ROMol &mol, const MolBundle &query,
112 const MolBundle &bundle, const MolBundle &query,
114
115//! Find a substructure match for a query
116/*!
117 \param mol The object to be searched
118 \param query The query
119 \param matchVect Used to return the match
120 (pre-existing contents will be deleted)
121 \param recursionPossible flags whether or not recursive matches are allowed
122 \param useChirality use atomic CIP codes as part of the comparison
123 \param useQueryQueryMatches if set, the contents of atom and bond queries
124 will be used as part of the matching
125
126 \return whether or not a match was found
127
128*/
129template <typename T1, typename T2>
130bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
131 bool recursionPossible = true, bool useChirality = false,
132 bool useQueryQueryMatches = false) {
134 params.recursionPossible = recursionPossible;
135 params.useChirality = useChirality;
136 params.useQueryQueryMatches = useQueryQueryMatches;
137 params.maxMatches = 1;
138 std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
139 if (matchVects.size()) {
140 matchVect = matchVects.front();
141 } else {
142 matchVect.clear();
143 }
144 return matchVect.size() != 0;
145};
146
147//! Find all substructure matches for a query
148/*!
149 \param mol The object to be searched
150 \param query The query
151 \param matchVect Used to return the matches
152 (pre-existing contents will be deleted)
153 \param uniquify Toggles uniquification (by atom index) of the results
154 \param recursionPossible flags whether or not recursive matches are allowed
155 \param useChirality use atomic CIP codes as part of the comparison
156 \param useQueryQueryMatches if set, the contents of atom and bond queries
157 will be used as part of the matching
158 \param maxMatches The maximum number of matches that will be returned.
159 In high-symmetry cases with medium-sized molecules, it is
160 very
161 easy to end up with a combinatorial explosion in the
162 number of
163 possible matches. This argument prevents that from having
164 unintended consequences
165
166 \return the number of matches found
167
168*/
169template <typename T1, typename T2>
170unsigned int SubstructMatch(T1 &mol, const T2 &query,
171 std::vector<MatchVectType> &matchVect,
172 bool uniquify = true, bool recursionPossible = true,
173 bool useChirality = false,
174 bool useQueryQueryMatches = false,
175 unsigned int maxMatches = 1000,
176 int numThreads = 1) {
178 params.uniquify = uniquify;
179 params.recursionPossible = recursionPossible;
180 params.useChirality = useChirality;
181 params.useQueryQueryMatches = useQueryQueryMatches;
182 params.maxMatches = maxMatches;
183 params.numThreads = numThreads;
184 matchVect = SubstructMatch(mol, query, params);
185 return static_cast<unsigned int>(matchVect.size());
186};
187
188// ----------------------------------------------
189//
190// find one match in ResonanceMolSupplier object
191//
192template <>
194 const ROMol &query, MatchVectType &matchVect,
195 bool recursionPossible, bool useChirality,
196 bool useQueryQueryMatches) {
198 params.recursionPossible = recursionPossible;
199 params.useChirality = useChirality;
200 params.useQueryQueryMatches = useQueryQueryMatches;
201 params.maxMatches = 1;
202 std::vector<MatchVectType> matchVects =
203 SubstructMatch(resMolSupplier, query, params);
204 if (matchVects.size()) {
205 matchVect = matchVects.front();
206 } else {
207 matchVect.clear();
208 }
209 return matchVect.size() != 0;
210}
211
212template <>
214 const ROMol &query,
215 std::vector<MatchVectType> &matchVect,
216 bool uniquify, bool recursionPossible,
217 bool useChirality, bool useQueryQueryMatches,
218 unsigned int maxMatches, int numThreads) {
220 params.uniquify = uniquify;
221 params.recursionPossible = recursionPossible;
222 params.useChirality = useChirality;
223 params.useQueryQueryMatches = useQueryQueryMatches;
224 params.maxMatches = maxMatches;
225 params.numThreads = numThreads;
226 matchVect = SubstructMatch(resMolSupplier, query, params);
227 return static_cast<unsigned int>(matchVect.size());
228};
229
230//! Class used as a final step to confirm whether or not a given atom->atom
231//! mapping is a valid substructure match.
233 public:
234 MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
235 const SubstructMatchParameters &ps);
236
237 bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
238
239 private:
240 const ROMol &d_query;
241 const ROMol &d_mol;
242 const SubstructMatchParameters &d_params;
243 std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
244#ifdef RDK_INTERNAL_BITSET_HAS_HASH
245 // Boost 1.71 added support for std::hash with dynamic_bitset.
246 using HashedStorageType = boost::dynamic_bitset<>;
247#else
248 // otherwise we use a less elegant solution
249 using HashedStorageType = std::string;
250#endif
251 std::unordered_set<HashedStorageType> matchesSeen;
252};
253
254} // namespace RDKit
255
256#endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:59
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[])
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition export.h:529
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(const SubstructMatchParameters &params)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
std::vector< std::string > atomProperties
std::vector< std::string > bondProperties
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck