RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Catalog.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2006 Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_CATALOG_H
13#define RD_CATALOG_H
14
15// Boost graph stuff
17#include <boost/graph/graph_traits.hpp>
18#include <boost/graph/adjacency_list.hpp>
19#include <boost/version.hpp>
20#if BOOST_VERSION >= 104000
21#include <boost/property_map/property_map.hpp>
22#else
23#include <boost/property_map.hpp>
24#endif
26
27// for some typedefs
28#include <RDGeneral/types.h>
29#include <RDGeneral/StreamOps.h>
30
31namespace RDCatalog {
32const int versionMajor = 1;
33const int versionMinor = 0;
34const int versionPatch = 0;
35const int endianId = 0xDEADBEEF;
36
37//-----------------------------------------------------------------------------
38//! abstract base class for a catalog object
39template <class entryType, class paramType>
40class Catalog {
41 public:
42 typedef entryType entryType_t;
43 typedef paramType paramType_t;
44
45 //------------------------------------
46 Catalog() : dp_cParams(nullptr) {}
47
48 //------------------------------------
49 virtual ~Catalog() { delete dp_cParams; }
50
51 //------------------------------------
52 //! return a serialized form of the Catalog as an std::string
53 virtual std::string Serialize() const = 0;
54
55 //------------------------------------
56 //! adds an entry to the catalog
57 /*!
58
59 \param entry the entry to be added
60 \param updateFPLength (optional) if this is true, our internal
61 fingerprint length will also be updated.
62
63 */
64 virtual unsigned int addEntry(entryType *entry,
65 bool updateFPLength = true) = 0;
66
67 //------------------------------------
68 //! returns a particular entry in the Catalog
69 virtual const entryType *getEntryWithIdx(unsigned int idx) const = 0;
70
71 //------------------------------------
72 //! returns the number of entries
73 virtual unsigned int getNumEntries() const = 0;
74
75 //------------------------------------
76 //! returns the length of our fingerprint
77 unsigned int getFPLength() const { return d_fpLength; }
78
79 //------------------------------------
80 //! sets our fingerprint length
81 void setFPLength(unsigned int val) { d_fpLength = val; }
82
83 //------------------------------------
84 //! sets our parameters by copying the \c params argument
85 virtual void setCatalogParams(const paramType *params) {
86 PRECONDITION(params, "bad parameter object");
87 // if we already have a parameter object throw an exception
89 "A parameter object already exists on the catalog");
90 /*
91 if (dp_cParams) {
92 // we already have parameter object on the catalog
93 // can't overwrite it
94 PRECONDITION(0, "A parameter object already exist on the catalog");
95 }*/
96 dp_cParams = new paramType(*params);
97 }
98
99 //------------------------------------
100 //! returns a pointer to our parameters
101 const paramType *getCatalogParams() const { return dp_cParams; }
102
103 protected:
104 // this is the ID that will be assigned to the next entry
105 // added to the catalog - need not be same as the number of entries
106 // in the catalog and does not correspond with the
107 // id of the entry in the catalog.
108 // this is more along the lines of bitId
109 unsigned int d_fpLength{0}; //!< the length of our fingerprint
110 paramType *dp_cParams; //!< our params object
111};
112
113//-----------------------------------------------------------------------------
114//! A Catalog with a hierarchical structure
115/*!
116
117 The entries of a HierarchCatalog are arranged in a directed graph
118
119 <b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
120
121 A HierarchCatalog may contain more entries than the user is actually
122 interested in. For example a HierarchCatalog constructed to contain
123 orders 5 through 8 may well contain information about orders 1-5,
124 in order to facilitate some search optimizations.
125
126 - <i>Bit Ids</i> refer to the "interesting" bits.
127 So, in the above example, Bit Id \c 0 will be the first entry
128 with order 5.
129 - <i>Indices</i> refer to the underlying structure of the catalog.
130 So, in the above example, the entry with index \c 0 will be
131 the first entry with order 1.
132
133*/
134template <class entryType, class paramType, class orderType>
135class HierarchCatalog : public Catalog<entryType, paramType> {
136 // the entries in the catalog can be traversed using the edges
137 // in a desired order
138 public:
139 //! used by the BGL to set up the node properties in our graph
141 enum { num = 1003 };
142 typedef boost::vertex_property_tag kind;
143 };
144 typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
145
146 //! the type of the graph itself:
147 typedef boost::adjacency_list<
148 boost::vecS,
149 boost::vecS, // FIX: should be using setS for edges so that parallel
150 // edges are never added (page 225 BGL book)
151 // but that seems result in compile errors
152 boost::bidirectionalS, EntryProperty>
154
155 typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
156 typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
157 typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
158 typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
159 typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
160
161 //------------------------------------
163
164 //------------------------------------
165 //! Construct by making a copy of the input \c params object
166 HierarchCatalog(const paramType *params) : Catalog<entryType, paramType>() {
167 this->setCatalogParams(params);
168 }
169
170 //------------------------------------
171 //! Construct from a \c pickle (a serialized form of the HierarchCatalog)
172 HierarchCatalog(const std::string &pickle) { this->initFromString(pickle); }
173
174 //------------------------------------
175 ~HierarchCatalog() override { destroy(); }
176
177 //------------------------------------
178 //! serializes this object to a stream
179 void toStream(std::ostream &ss) const {
180 PRECONDITION(this->getCatalogParams(), "NULL parameter object");
181
182 // the i/o header:
187
188 // information about the catalog itself:
189 int tmpUInt;
190 tmpUInt = this->getFPLength();
191 RDKit::streamWrite(ss, tmpUInt);
192 tmpUInt = this->getNumEntries();
193 RDKit::streamWrite(ss, tmpUInt);
194
195 // std::cout << ">>>>-------------------------------" << std::endl;
196 // std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() <<
197 // std::endl;
198
199 // add the params object:
200 this->getCatalogParams()->toStream(ss);
201 // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
202 // std::cout << " " << getCatalogParams()->getUpperFragLength();
203 // std::cout << " " << getCatalogParams()->getNumFuncGroups();
204 // std::cout << std::endl;
205
206 // write the entries in order:
207 for (unsigned int i = 0; i < getNumEntries(); i++) {
208 this->getEntryWithIdx(i)->toStream(ss);
209 }
210
211 // finally the adjacency list:
212 for (unsigned int i = 0; i < getNumEntries(); i++) {
213 RDKit::INT_VECT children = this->getDownEntryList(i);
214 tmpUInt = static_cast<unsigned int>(children.size());
215 RDKit::streamWrite(ss, tmpUInt);
216 for (RDKit::INT_VECT::const_iterator ivci = children.begin();
217 ivci != children.end(); ivci++) {
218 RDKit::streamWrite(ss, *ivci);
219 }
220 }
221 }
222
223 //------------------------------------
224 //! serializes this object and returns the resulting \c pickle
225 std::string Serialize() const override {
226 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
227 std::ios_base::in);
228 this->toStream(ss);
229 return ss.str();
230 }
231
232 //------------------------------------
233 //! fills the contents of this object from a stream containing a \c pickle
234 void initFromStream(std::istream &ss) {
235 int tmpInt;
236 // FIX: at the moment we ignore the header info:
237 RDKit::streamRead(ss, tmpInt);
238 RDKit::streamRead(ss, tmpInt);
239 RDKit::streamRead(ss, tmpInt);
240 RDKit::streamRead(ss, tmpInt);
241
242 unsigned int tmpUInt;
243 RDKit::streamRead(ss, tmpUInt); // fp length
244 this->setFPLength(tmpUInt);
245
246 unsigned int numEntries;
247 RDKit::streamRead(ss, numEntries);
248 // std::cout << "<<<-------------------------------" << std::endl;
249 // std::cout << "\tlength: " << getFPLength() << " " << numEntries <<
250 // std::endl;
251
252 // grab the params:
253 paramType *params = new paramType();
254 params->initFromStream(ss);
255 this->setCatalogParams(params);
256 delete params;
257
258 // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
259 // std::cout << " " << getCatalogParams()->getUpperFragLength();
260 // std::cout << " " << getCatalogParams()->getNumFuncGroups();
261 // std::cout << std::endl;
262
263 // now all of the entries:
264 for (unsigned int i = 0; i < numEntries; i++) {
265 entryType *entry = new entryType();
266 entry->initFromStream(ss);
267 this->addEntry(entry, false);
268 }
269
270 // and, finally, the adjacency list:
271 for (unsigned int i = 0; i < numEntries; i++) {
272 unsigned int nNeighbors;
273 RDKit::streamRead(ss, nNeighbors);
274 for (unsigned int j = 0; j < nNeighbors; j++) {
275 RDKit::streamRead(ss, tmpInt);
276 this->addEdge(i, tmpInt);
277 }
278 }
279 }
280
281 //------------------------------------
282 unsigned int getNumEntries() const override {
283 return static_cast<unsigned int>(boost::num_vertices(d_graph));
284 }
285
286 //------------------------------------
287 //! fills the contents of this object from a string containing a \c pickle
288 void initFromString(const std::string &text) {
289 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
290 std::ios_base::in);
291 // initialize the stream:
292 ss.write(text.c_str(), text.length());
293 // now start reading out values:
294 this->initFromStream(ss);
295 }
296
297 //------------------------------------
298 //! add a new entry to the catalog
299 /*!
300
301 \param entry the entry to be added
302 \param updateFPLength (optional) if this is true, our internal
303 fingerprint length will also be updated.
304
305 */
306 unsigned int addEntry(entryType *entry, bool updateFPLength = true) override {
307 PRECONDITION(entry, "bad arguments");
308 if (updateFPLength) {
309 unsigned int fpl = this->getFPLength();
310 entry->setBitId(fpl);
311 fpl++;
312 this->setFPLength(fpl);
313 }
314 unsigned int eid = static_cast<unsigned int>(
315 boost::add_vertex(EntryProperty(entry), d_graph));
316 orderType etype = entry->getOrder();
317 // REVIEW: this initialization is not required: the STL map, in
318 // theory, will create a new object when operator[] is called
319 // for a new item
320 if (d_orderMap.find(etype) == d_orderMap.end()) {
321 RDKit::INT_VECT nets;
322 d_orderMap[etype] = nets;
323 }
324 d_orderMap[etype].push_back(eid);
325 return eid;
326 }
327
328 //------------------------------------
329 //! adds an edge between two entries in the catalog
330 /*!
331 Since we are using a bidirectional graph - the order in
332 which the ids are supplied here makes a difference
333
334 \param id1 index of the edge's beginning
335 \param id2 index of the edge's end
336
337 */
338 void addEdge(unsigned int id1, unsigned int id2) {
339 unsigned int nents = getNumEntries();
340 URANGE_CHECK(id1, nents);
341 URANGE_CHECK(id2, nents);
342 // FIX: if we boost::setS for the edgeList BGL will
343 // do the checking for duplicity (parallel edges)
344 // But for reasons unknown setS results in compile
345 // errors while using adjacent_vertices.
346 typename CAT_GRAPH_TRAITS::edge_descriptor edge;
347 bool found;
348 boost::tie(edge, found) = boost::edge(boost::vertex(id1, d_graph),
349 boost::vertex(id2, d_graph), d_graph);
350 if (!found) {
351 boost::add_edge(id1, id2, d_graph);
352 }
353 }
354
355 //------------------------------------
356 //! returns a pointer to our entry with a particular index
357 const entryType *getEntryWithIdx(unsigned int idx) const override {
359 int vd = static_cast<int>(boost::vertex(idx, d_graph));
360 typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
361 pMap = boost::get(vertex_entry_t(), d_graph);
362 return pMap[vd];
363 }
364
365 //------------------------------------
366 //! returns a pointer to our entry with a particular bit ID
367 const entryType *getEntryWithBitId(unsigned int idx) const {
368 URANGE_CHECK(idx, this->getFPLength());
369 typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
370 pMap = boost::get(vertex_entry_t(), d_graph);
371 const entryType *res = nullptr;
372 for (unsigned int i = idx; i < this->getNumEntries(); i++) {
373 const entryType *e = pMap[i];
374 if (e->getBitId() == static_cast<int>(idx)) {
375 res = e;
376 break;
377 }
378 }
379 return res;
380 }
381
382 //------------------------------------
383 //! returns the index of the entry with a particular bit ID
384 int getIdOfEntryWithBitId(unsigned int idx) const {
385 URANGE_CHECK(idx, this->getFPLength());
386 typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
387 pMap = boost::get(vertex_entry_t(), d_graph);
388 int res = -1;
389 for (unsigned int i = idx; i < this->getNumEntries(); i++) {
390 const entryType *e = pMap[i];
391 if (static_cast<unsigned int>(e->getBitId()) == idx) {
392 res = i;
393 break;
394 }
395 }
396 return res;
397 }
398
399 //------------------------------------
400 //! returns a list of the indices of entries below the one passed in
401 RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
402 RDKit::INT_VECT res;
403 DOWN_ENT_ITER nbrIdx, endIdx;
404 boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
405 while (nbrIdx != endIdx) {
406 res.push_back(static_cast<int>(*nbrIdx));
407 nbrIdx++;
408 }
409 // std::cout << res.size() << "\n";
410 return res;
411 }
412
413 //------------------------------------
414 //! returns a list of the indices that have a particular order
415 const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
416 return d_orderMap[ord];
417 }
418
419 //------------------------------------
420 //! returns a list of the indices that have a particular order
421 /*!
422 \overload
423 */
424 const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
425 typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
426 elem = d_orderMap.find(ord);
428 elem != d_orderMap.end(),
429 " catalog does not contain any entries of the order specified");
430 return elem->second;
431 }
432
433 private:
434 // graphs that store the entries in the catalog in a hierarchical manner
435 CatalogGraph d_graph;
436 // a map that maps the order type of entries in the catalog to
437 // a vector of vertex indices in the graphs above
438 // e.g. for a catalog with molecular fragments, the order of a fragment can
439 // simply be the number of bond in it. The list this oder maps to is all the
440 // vertex ids of these fragment in the catalog that have this many bonds in
441 // them
442 std::map<orderType, RDKit::INT_VECT> d_orderMap;
443
444 //------------------------------------
445 //! clear any memory that we've used
446 void destroy() {
447 ENT_ITER_PAIR entItP = boost::vertices(d_graph);
448 typename boost::property_map<CatalogGraph, vertex_entry_t>::type pMap =
449 boost::get(vertex_entry_t(), d_graph);
450 while (entItP.first != entItP.second) {
451 delete pMap[*(entItP.first++)];
452 }
453 }
454};
455
456} // namespace RDCatalog
457
458#endif
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:101
#define URANGE_CHECK(x, hi)
Definition Invariant.h:142
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
abstract base class for a catalog object
Definition Catalog.h:40
virtual std::string Serialize() const =0
return a serialized form of the Catalog as an std::string
virtual unsigned int addEntry(entryType *entry, bool updateFPLength=true)=0
adds an entry to the catalog
unsigned int getFPLength() const
returns the length of our fingerprint
Definition Catalog.h:77
virtual void setCatalogParams(const paramType *params)
sets our parameters by copying the params argument
Definition Catalog.h:85
entryType entryType_t
Definition Catalog.h:42
virtual unsigned int getNumEntries() const =0
returns the number of entries
const paramType * getCatalogParams() const
returns a pointer to our parameters
Definition Catalog.h:101
virtual ~Catalog()
Definition Catalog.h:49
paramType * dp_cParams
our params object
Definition Catalog.h:110
unsigned int d_fpLength
the length of our fingerprint
Definition Catalog.h:109
void setFPLength(unsigned int val)
sets our fingerprint length
Definition Catalog.h:81
paramType paramType_t
Definition Catalog.h:43
virtual const entryType * getEntryWithIdx(unsigned int idx) const =0
returns a particular entry in the Catalog
A Catalog with a hierarchical structure.
Definition Catalog.h:135
boost::graph_traits< CatalogGraph > CAT_GRAPH_TRAITS
Definition Catalog.h:155
CAT_GRAPH_TRAITS::vertex_iterator VER_ITER
Definition Catalog.h:156
CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER
Definition Catalog.h:158
const entryType * getEntryWithBitId(unsigned int idx) const
returns a pointer to our entry with a particular bit ID
Definition Catalog.h:367
HierarchCatalog(const paramType *params)
Construct by making a copy of the input params object.
Definition Catalog.h:166
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord) const
returns a list of the indices that have a particular order
Definition Catalog.h:424
RDKit::INT_VECT getDownEntryList(unsigned int idx) const
returns a list of the indices of entries below the one passed in
Definition Catalog.h:401
int getIdOfEntryWithBitId(unsigned int idx) const
returns the index of the entry with a particular bit ID
Definition Catalog.h:384
unsigned int getNumEntries() const override
returns the number of entries
Definition Catalog.h:282
const entryType * getEntryWithIdx(unsigned int idx) const override
returns a pointer to our entry with a particular index
Definition Catalog.h:357
std::string Serialize() const override
serializes this object and returns the resulting pickle
Definition Catalog.h:225
void initFromStream(std::istream &ss)
fills the contents of this object from a stream containing a pickle
Definition Catalog.h:234
HierarchCatalog(const std::string &pickle)
Construct from a pickle (a serialized form of the HierarchCatalog)
Definition Catalog.h:172
boost::adjacency_list< boost::vecS, boost::vecS, boost::bidirectionalS, EntryProperty > CatalogGraph
the type of the graph itself:
Definition Catalog.h:153
std::pair< VER_ITER, VER_ITER > ENT_ITER_PAIR
Definition Catalog.h:157
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord)
returns a list of the indices that have a particular order
Definition Catalog.h:415
void initFromString(const std::string &text)
fills the contents of this object from a string containing a pickle
Definition Catalog.h:288
~HierarchCatalog() override
Definition Catalog.h:175
boost::property< vertex_entry_t, entryType * > EntryProperty
Definition Catalog.h:144
std::pair< DOWN_ENT_ITER, DOWN_ENT_ITER > DOWN_ENT_ITER_PAIR
Definition Catalog.h:159
unsigned int addEntry(entryType *entry, bool updateFPLength=true) override
add a new entry to the catalog
Definition Catalog.h:306
void addEdge(unsigned int id1, unsigned int id2)
adds an edge between two entries in the catalog
Definition Catalog.h:338
void toStream(std::ostream &ss) const
serializes this object to a stream
Definition Catalog.h:179
const int endianId
Definition Catalog.h:35
const int versionMinor
Definition Catalog.h:33
const int versionMajor
Definition Catalog.h:32
const int versionPatch
Definition Catalog.h:34
std::vector< int > INT_VECT
Definition types.h:289
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition StreamOps.h:283
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition StreamOps.h:261
used by the BGL to set up the node properties in our graph
Definition Catalog.h:140
boost::vertex_property_tag kind
Definition Catalog.h:142
@ num
Definition Catalog.h:141