RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
EvenSamplePairs.h
Go to the documentation of this file.
1//
2// Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following
13// disclaimer in the documentation and/or other materials provided
14// with the distribution.
15// * Neither the name of Novartis Institutes for BioMedical Research Inc.
16// nor the names of its contributors may be used to endorse or promote
17// products derived from this software without specific prior written
18// permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//
32
33#include <RDGeneral/export.h>
34#ifndef RGROUP_EVEN_SAMPLE_H
35#define RGROUP_EVEN_SAMPLE_H
36
38#ifdef RDK_USE_BOOST_SERIALIZATION
39#include <boost/serialization/set.hpp>
40#endif
41#include <cstdint>
42
43namespace RDKit {
44//! EvenSamplePairsStrategy
45/*! Randomly sample Pairs evenly from a collection of building blocks
46 This is a good strategy for choosing a relatively small selection
47 of building blocks from a larger set. As the amount of work needed
48 to retrieve the next evenly sample building block grows with the
49 number of samples, this method performs progressively worse as the
50 number of samples gets larger.
51
52 See EnumeartionStrategyBase for more details.
53*/
54
57 boost::uint64_t m_numPermutationsProcessed{};
58
59 std::vector<boost::int64_t> used_count;
60 std::vector<std::vector<boost::uint64_t>> var_used;
61 std::vector<std::vector<boost::uint64_t>> pair_used;
62 std::vector<std::vector<boost::uint64_t>> pair_counts;
63 std::set<boost::uint64_t> selected;
64
65 boost::uint64_t seed{}; // last seed for permutation (starts at 0)
66 boost::uint64_t M{}, a{}, b{}; // random number stuff
67 boost::uint64_t nslack{}, min_nslack{};
68 boost::uint64_t rejected_period{}, rejected_unique{};
69 boost::uint64_t rejected_slack_condition{}, rejected_bb_sampling_condition{};
70
71 public:
74
75 used_count(),
76 var_used(),
77 pair_used(),
78 pair_counts(),
79 selected() {}
80
83 m_numPermutationsProcessed(rhs.m_numPermutationsProcessed),
84 used_count(rhs.used_count),
85 var_used(rhs.var_used),
86 pair_used(rhs.pair_used),
87 pair_counts(rhs.pair_counts),
88 selected(rhs.selected),
89 seed(rhs.seed),
90 M(rhs.M),
91 a(rhs.a),
92 b(rhs.b),
93 nslack(rhs.nslack),
94 min_nslack(rhs.min_nslack),
95 rejected_period(rhs.rejected_period),
96 rejected_unique(rhs.rejected_unique),
97 rejected_slack_condition(rhs.rejected_slack_condition),
98 rejected_bb_sampling_condition(rhs.rejected_bb_sampling_condition) {}
99
100 const char *type() const override { return "EvenSamplePairsStrategy"; }
101
102 //! This is a class for enumerating RGroups using Cartesian Products of
103 //! reagents.
104 /*!
105 basic usage:
106
107 \verbatim
108 std::vector<MOL_SPTR_VECT> bbs;
109 bbs.push_back( bbs_for_reactants_1 );
110 bbs.push_back( bbs_for_reactants_2 );
111
112 EvenSamplePairsStrategy rgroups;
113 rgroups.initialize(rxn, bbs);
114 for(boost::uint64_t i=0; i<num_samples && rgroups; ++i) {
115 MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
116 std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
117 ...
118 }
119 \endverbatim
120 */
121 using EnumerationStrategyBase::initialize;
122
124 const EnumerationTypes::BBS &) override;
125
126 //! The current permutation {r1, r2, ...}
128
129 boost::uint64_t getPermutationIdx() const override {
130 return m_numPermutationsProcessed;
131 }
132
133 operator bool() const override { return true; }
134
135 EnumerationStrategyBase *copy() const override {
136 return new EvenSamplePairsStrategy(*this);
137 }
138
139 std::string stats() const;
140
141 private:
142 friend class boost::serialization::access;
143
144 // decode a packed integer into an RGroup selection
145 const EnumerationTypes::RGROUPS &decode(boost::uint64_t seed) {
146 for (boost::int64_t j = m_permutationSizes.size() - 1; j >= 0; j--) {
147 m_permutation[j] = seed % m_permutationSizes[j];
148 seed /= m_permutationSizes[j];
149 }
150 return m_permutation;
151 }
152
153 bool try_add(boost::uint64_t seed);
154
155 public:
156#ifdef RDK_USE_BOOST_SERIALIZATION
157 template <class Archive>
158 void serialize(Archive &ar, const unsigned int /*version*/) {
159 // invoke serialization of the base class
160 ar &boost::serialization::base_object<EnumerationStrategyBase>(*this);
161 ar & m_numPermutationsProcessed;
162 ar & used_count;
163 ar & var_used;
164 ar & pair_used;
165 ar & pair_counts;
166 ar & selected;
167
168 ar & seed;
169
170 ar & M;
171 ar & a;
172 ar & b;
173
174 ar & nslack;
175 ar & min_nslack;
176 ar & rejected_period;
177 ar & rejected_unique;
178 ar & rejected_slack_condition;
179 ar & rejected_bb_sampling_condition;
180 }
181#endif
182};
183} // namespace RDKit
184
185#ifdef RDK_USE_BOOST_SERIALIZATION
186BOOST_CLASS_VERSION(RDKit::EvenSamplePairsStrategy, 1)
187#endif
188
189#endif
This is a class for storing and applying general chemical reactions.
Definition Reaction.h:121
EvenSamplePairsStrategy.
EvenSamplePairsStrategy(const EvenSamplePairsStrategy &rhs)
void initializeStrategy(const ChemicalReaction &, const EnumerationTypes::BBS &) override
std::string stats() const
EnumerationStrategyBase * copy() const override
copy the enumeration strategy complete with current state
const EnumerationTypes::RGROUPS & next() override
The current permutation {r1, r2, ...}.
const char * type() const override
boost::uint64_t getPermutationIdx() const override
Returns how many permutations have been processed by this strategy.
#define RDKIT_CHEMREACTIONS_EXPORT
Definition export.h:49
std::vector< boost::uint64_t > RGROUPS
std::vector< MOL_SPTR_VECT > BBS
Std stuff.