RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
FilterCatalog.h
Go to the documentation of this file.
1// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following
12// disclaimer in the documentation and/or other materials provided
13// with the distribution.
14// * Neither the name of Novartis Institutes for BioMedical Research Inc.
15// nor the names of its contributors may be used to endorse or promote
16// products derived from this software without specific prior written
17// permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30//
31
32#include <RDGeneral/export.h>
33#ifndef _RD_FILTER_CATALOG_PARAMS_
34#define _RD_FILTER_CATALOG_PARAMS_
35
36#include <Catalogs/Catalog.h>
38#include "FilterCatalogEntry.h"
39
40namespace RDKit {
41class FilterCatalog;
44 public:
46 PAINS_A = (1u << 1),
47 PAINS_B = (1u << 2),
48 PAINS_C = (1u << 3),
49 PAINS = PAINS_A | PAINS_B | PAINS_C,
50
51 BRENK = (1u << 4),
52 NIH = (1u << 5),
53 ZINC = (1u << 6),
54
55 CHEMBL_Glaxo = (1u << 7),
56 CHEMBL_Dundee = (1u << 8),
57 CHEMBL_BMS = (1u << 9),
58 CHEMBL_SureChEMBL = (1u << 10),
59 CHEMBL_MLSMR = (1u << 11),
60 CHEMBL_Inpharmatica = (1u << 12),
61 CHEMBL_LINT = (1u << 13),
62 CHEMBL = CHEMBL_Glaxo | CHEMBL_Dundee | CHEMBL_BMS | CHEMBL_SureChEMBL |
63 CHEMBL_MLSMR | CHEMBL_Inpharmatica | CHEMBL_LINT,
64
65 ALL = PAINS | BRENK | NIH | ZINC | CHEMBL
66 };
67
68 FilterCatalogParams() : RDCatalog::CatalogParams() {
69 setTypeStr("Filter Catalog Parameters");
70 }
71
72 FilterCatalogParams(FilterCatalogs catalogs) : RDCatalog::CatalogParams() {
73 setTypeStr("Filter Catalog Parameters");
74 addCatalog(catalogs);
75 }
76
78 : RDCatalog::CatalogParams(other), d_catalogs(other.d_catalogs) {}
79
81
82 //------------------------------------
83 //! Adds an existing FilterCatalog specification to be used in the
84 /// FilterCatalog
85 //
86 /*!
87 Specifies an existing filter catalog to be used.
88
89 \param catalogs One of the enumerated known FilterCatalogs
90 */
91 virtual bool addCatalog(FilterCatalogs catalogs);
92
93 //------------------------------------
94 //! Returns the existing list of FilterCatalogs to be used.
95 const std::vector<FilterCatalogs> &getCatalogs() const { return d_catalogs; }
96 //! Fill a catalog with the appropriate entries
97 virtual void fillCatalog(FilterCatalog &catalog) const;
98
99 //! serializes (pickles) to a stream
100 void toStream(std::ostream &ss) const override;
101 //! returns a string with a serialized (pickled) representation
102 std::string Serialize() const override;
103 //! initializes from a stream pickle
104 void initFromStream(std::istream &ss) override;
105 //! initializes from a string pickle
106 void initFromString(const std::string &text) override;
107
108 private:
109 std::vector<FilterCatalogs> d_catalogs;
110
111#ifdef RDK_USE_BOOST_SERIALIZATION
112 friend class boost::serialization::access;
113 template <class Archive>
114 void serialize(Archive &ar, const unsigned int version) {
115 RDUNUSED_PARAM(version);
116 ar & d_catalogs;
117 }
118#endif
119};
120
123 public:
124 // syntactic sugar for getMatch(es) return values.
125 typedef boost::shared_ptr<FilterCatalogEntry> SENTRY;
126
127 // If boost::python can support shared_ptr of const objects
128 // we can enable support for this feature
129 typedef boost::shared_ptr<const entryType_t> CONST_SENTRY;
130
131 FilterCatalog() : FCatalog(), d_entries() {}
132
134 : FCatalog(), d_entries() {
135 paramType_t temp_params(catalogs);
136 setCatalogParams(&temp_params);
137 }
138
139 FilterCatalog(const FilterCatalogParams &params) : FCatalog(), d_entries() {
140 setCatalogParams(&params);
141 }
142
144 : FCatalog(rhs), d_entries(rhs.d_entries) {}
145
146 FilterCatalog(const std::string &binStr);
147
148 ~FilterCatalog() override;
149
150 std::string Serialize() const override;
151
152 // Adds a new FilterCatalogEntry to the catalog
153 /*!
154 Adds a new FilterCatalogEntry to the catalog The catalog
155 owns the entry
156
157 \param entry The FilterCatalogEntry to add.
158 \param updateFPLength unused in the FilterCatalog object.
159 */
160
161 unsigned int addEntry(FilterCatalogEntry *entry,
162 bool updateFPLength = true) override;
163
164 // Adds a new FilterCatalogEntry to the catalog
165 /*!
166 Adds a new FilterCatalogEntry to the catalog The catalog
167 owns the entry
168
169 \param entry The shared_ptr of the FilterCatalogEntry to add.
170 \param updateFPLength unused in the FilterCatalog object.
171 */
172
173 virtual unsigned int addEntry(SENTRY entry, bool updateFPLength = true);
174
175 // Removes a FilterCatalogEntry to the catalog by description
176 /*!
177 Removes a FilterCatalogEntry from the catalog.
178
179 \param idx The FilterCatalogEntry index for the entry to remove.
180 n.b. removing an entry may change the indices of other entries.
181 To safely remove entries, remove entries with the highest idx
182 first.
183 */
184 bool removeEntry(unsigned int idx);
186
187 //------------------------------------
188 //! returns a particular FilterCatalogEntry in the Catalog
189 //! required by Catalog.h API
190 const FilterCatalogEntry *getEntryWithIdx(unsigned int idx) const override;
191
192 //------------------------------------
193 //! returns a particular FilterCatalogEntry in the Catalog
194 //! memory safe version of getEntryWithIdx
195 CONST_SENTRY getEntry(unsigned int idx) const;
196
197 //------------------------------------
198 //! returns the idx of the given entry, UINT_MAX if not found.
199
200 unsigned int getIdxForEntry(const FilterCatalogEntry *entry) const;
201 unsigned int getIdxForEntry(CONST_SENTRY entry) const;
202
203 //------------------------------------
204 //! returns the number of entries in the catalog
205 unsigned int getNumEntries() const override {
206 return static_cast<unsigned int>(d_entries.size());
207 }
208
209 //------------------------------------
210 //! Reset the current catalog to match the specified FilterCatalogParameters
211 /*
212 \param params The new FilterCatalogParams specifying the new state of the
213 catalog
214 */
215 void setCatalogParams(const FilterCatalogParams *params) override;
216
217 //------------------------------------
218 //! Returns true if the molecule matches any entry in the catalog
219 /*
220 \param mol ROMol to match against the catalog
221 */
222 bool hasMatch(const ROMol &mol) const;
223
224 //------------------------------------
225 //! Returns the first match against the catalog
226 /*
227 \param mol ROMol to match against the catalog
228 */
230
231 //-------------------------------------------
232 //! Returns all entry matches to the molecule
233 /*
234 \param mol ROMol to match against the catalog
235 */
236 const std::vector<CONST_SENTRY> getMatches(const ROMol &mol) const;
237
238 //--------------------------------------------
239 //! Returns all FilterMatches for the molecule
240 /*
241 \param mol ROMol to match against the catalog
242 */
243 const std::vector<FilterMatch> getFilterMatches(const ROMol &mol) const;
244
245 private:
246 void Clear();
247 std::vector<SENTRY> d_entries;
248};
249
251
252//! Run a filter catalog on a set of smiles strings
253/*
254 \param smiles vector of smiles strings to analyze
255 \param nthreads specify the number of threads to use or specify 0 to use all
256 processors [default 1]
257 \returns a vector of vectors. For each input smiles string, returns
258 a vector of shared_ptr::FilterMatchEntry objects.
259 If a molecule matches no filters, the vector will be empty.
260 If a smiles can't be parsed, a 'no valid RDKit molecule'
261 catalog entry is returned.
262
263*/
265std::vector<std::vector<boost::shared_ptr<const FilterCatalogEntry>>>
267 const std::vector<std::string> &smiles, int numThreads = 1);
268} // namespace RDKit
269
270#endif
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
abstract base class for the container used to create a catalog
abstract base class for a catalog object
Definition Catalog.h:40
paramType paramType_t
Definition Catalog.h:43
void toStream(std::ostream &ss) const override
serializes (pickles) to a stream
FilterCatalogParams(const FilterCatalogParams &other)
void initFromStream(std::istream &ss) override
initializes from a stream pickle
std::string Serialize() const override
returns a string with a serialized (pickled) representation
virtual void fillCatalog(FilterCatalog &catalog) const
Fill a catalog with the appropriate entries.
void initFromString(const std::string &text) override
initializes from a string pickle
const std::vector< FilterCatalogs > & getCatalogs() const
Returns the existing list of FilterCatalogs to be used.
virtual bool addCatalog(FilterCatalogs catalogs)
FilterCatalogParams(FilterCatalogs catalogs)
FilterCatalog(FilterCatalogParams::FilterCatalogs catalogs)
const std::vector< CONST_SENTRY > getMatches(const ROMol &mol) const
Returns all entry matches to the molecule.
unsigned int getNumEntries() const override
returns the number of entries in the catalog
bool removeEntry(CONST_SENTRY entry)
unsigned int addEntry(FilterCatalogEntry *entry, bool updateFPLength=true) override
boost::shared_ptr< const entryType_t > CONST_SENTRY
~FilterCatalog() override
CONST_SENTRY getEntry(unsigned int idx) const
FilterCatalog(const FilterCatalogParams &params)
const FilterCatalogEntry * getEntryWithIdx(unsigned int idx) const override
unsigned int getIdxForEntry(const FilterCatalogEntry *entry) const
returns the idx of the given entry, UINT_MAX if not found.
const std::vector< FilterMatch > getFilterMatches(const ROMol &mol) const
Returns all FilterMatches for the molecule.
bool removeEntry(unsigned int idx)
unsigned int getIdxForEntry(CONST_SENTRY entry) const
FilterCatalog(const FilterCatalog &rhs)
bool hasMatch(const ROMol &mol) const
Returns true if the molecule matches any entry in the catalog.
void setCatalogParams(const FilterCatalogParams *params) override
Reset the current catalog to match the specified FilterCatalogParameters.
virtual unsigned int addEntry(SENTRY entry, bool updateFPLength=true)
FilterCatalog(const std::string &binStr)
std::string Serialize() const override
return a serialized form of the Catalog as an std::string
boost::shared_ptr< FilterCatalogEntry > SENTRY
CONST_SENTRY getFirstMatch(const ROMol &mol) const
Returns the first match against the catalog.
#define RDKIT_FILTERCATALOG_EXPORT
Definition export.h:169
Std stuff.
RDCatalog::Catalog< FilterCatalogEntry, FilterCatalogParams > FCatalog
bool rdvalue_is(const RDValue_cast_t)
RDKIT_FILTERCATALOG_EXPORT std::vector< std::vector< boost::shared_ptr< const FilterCatalogEntry > > > RunFilterCatalog(const FilterCatalog &filterCatalog, const std::vector< std::string > &smiles, int numThreads=1)
Run a filter catalog on a set of smiles strings.
RDKIT_FILTERCATALOG_EXPORT bool FilterCatalogCanSerialize()