33#ifndef RDK_SUBSTRUCT_LIBRARY
34#define RDK_SUBSTRUCT_LIBRARY
53#include <boost/lexical_cast.hpp>
57using GeneralizedSubstruct::ExtendedQueryMol;
76 virtual boost::shared_ptr<ROMol>
getMol(
unsigned int)
const = 0;
79 virtual unsigned int size()
const = 0;
88 std::vector<boost::shared_ptr<ROMol>> mols;
94 mols.push_back(boost::make_shared<ROMol>(m));
98 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
99 if (idx >= mols.size()) {
105 unsigned int size()
const override {
106 return rdcast<unsigned int>(mols.size());
109 std::vector<boost::shared_ptr<ROMol>> &
getMols() {
return mols; }
110 const std::vector<boost::shared_ptr<ROMol>> &
getMols()
const {
return mols; }
122 std::vector<std::string> mols;
129 MolPickler::pickleMol(m, mols.back());
136 mols.push_back(pickle);
140 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
141 if (idx >= mols.size()) {
144 boost::shared_ptr<ROMol> mol(
new ROMol);
145 MolPickler::molFromPickle(mols[idx], mol.get());
149 unsigned int size()
const override {
150 return rdcast<unsigned int>(mols.size());
153 std::vector<std::string> &
getMols() {
return mols; }
154 const std::vector<std::string> &
getMols()
const {
return mols; }
168 std::vector<std::string> mols;
174 bool doIsomericSmiles =
true;
175 mols.push_back(MolToSmiles(m, doIsomericSmiles));
182 mols.push_back(smiles);
186 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
187 if (idx >= mols.size()) {
191 boost::shared_ptr<ROMol> mol(
SmilesToMol(mols[idx]));
195 unsigned int size()
const override {
196 return rdcast<unsigned int>(mols.size());
199 std::vector<std::string> &
getMols() {
return mols; }
200 const std::vector<std::string> &
getMols()
const {
return mols; }
219 std::vector<std::string> mols;
225 bool doIsomericSmiles =
true;
226 mols.push_back(MolToSmiles(m, doIsomericSmiles));
233 mols.push_back(smiles);
237 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
238 if (idx >= mols.size()) {
244 m->updatePropertyCache();
246 return boost::shared_ptr<ROMol>(m);
249 unsigned int size()
const override {
250 return rdcast<unsigned int>(mols.size());
253 std::vector<std::string> &
getMols() {
return mols; }
254 const std::vector<std::string> &
getMols()
const {
return mols; }
259 std::vector<ExplicitBitVect *> fps;
263 for (
size_t i = 0; i < fps.size(); ++i) {
268 virtual unsigned int size()
const {
return rdcast<unsigned int>(fps.size()); }
272 fps.push_back(makeFingerprint(m));
273 return rdcast<unsigned int>(fps.size() - 1);
281 return rdcast<unsigned int>(fps.size() - 1);
293 if (idx >= fps.size()) {
303 if (idx >= fps.size()) {
320 unsigned int numBits;
332 static const unsigned int DEFAULT_NUM_BITS = 2048;
333 return DEFAULT_NUM_BITS;
343 std::vector<unsigned int> *atomCounts =
nullptr;
345 const bool tautomericFingerprint =
true;
347 tautomericFingerprint);
360 virtual unsigned int addKey(
const std::string &) = 0;
364 virtual const std::string &
getKey(
unsigned int)
const = 0;
368 const std::vector<unsigned int> &indices)
const = 0;
370 virtual unsigned int size()
const = 0;
374 std::string propname;
375 std::vector<std::string> keys;
376 const std::string empty_string = {};
380 : propname(propname) {}
385 std::vector<std::string> &
getKeys() {
return keys; }
386 const std::vector<std::string> &
getKeys()
const {
return keys; }
390 if (m.getPropIfPresent(propname, key)) {
391 keys.push_back(std::move(key));
396 const static std::string prefix(
"LIBIDX-");
397 keys.emplace_back(prefix + boost::lexical_cast<std::string>(keys.size()));
399 return keys.size() - 1u;
402 unsigned int addKey(
const std::string &key)
override {
404 return keys.size() - 1u;
407 const std::string &
getKey(
unsigned int idx)
const override {
408 if (idx >= keys.size()) {
415 const std::vector<unsigned int> &indices)
const override {
416 std::vector<std::string> res;
417 std::transform(indices.begin(), indices.end(), std::back_inserter(res),
418 [=](
unsigned idx) { return keys.at(idx); });
421 unsigned int size()
const override {
return keys.size(); }
520 boost::shared_ptr<MolHolderBase> molholder;
521 boost::shared_ptr<FPHolderBase> fpholder;
522 boost::shared_ptr<KeyHolderBase> keyholder;
526 bool is_tautomerquery =
false;
527 std::vector<unsigned int> searchOrder;
534 mols(molholder.get()) {}
537 : molholder(std::move(molecules)),
540 mols(molholder.get()),
544 boost::shared_ptr<FPHolderBase> fingerprints)
545 : molholder(std::move(molecules)),
546 fpholder(std::move(fingerprints)),
548 mols(molholder.get()),
549 fps(fpholder.get()) {
550 if (fpholder.get() &&
552 is_tautomerquery =
true;
557 boost::shared_ptr<KeyHolderBase> keys)
558 : molholder(std::move(molecules)),
560 keyholder(std::move(keys)),
561 mols(molholder.get()),
563 if (fpholder.get() &&
565 is_tautomerquery =
true;
570 boost::shared_ptr<FPHolderBase> fingerprints,
571 boost::shared_ptr<KeyHolderBase> keys)
572 : molholder(std::move(molecules)),
573 fpholder(std::move(fingerprints)),
574 keyholder(std::move(keys)),
575 mols(molholder.get()),
576 fps(fpholder.get()) {
577 if (fpholder.get() &&
579 is_tautomerquery =
true;
586 mols(molholder.get()),
588 initFromString(pickle);
589 if (fpholder.get() &&
591 is_tautomerquery =
true;
603 boost::shared_ptr<FPHolderBase> &
getFpHolder() {
return fpholder; }
619 PRECONDITION(mols,
"Molecule holder NULL in SubstructLibrary");
642 if (!keyholder.get()) {
645 return *keyholder.get();
651 if (!keyholder.get()) {
654 return *keyholder.get();
679 template <
class Query>
681 bool recursionPossible =
true,
682 bool useChirality =
true,
683 bool useQueryQueryMatches =
false,
685 int maxResults = -1)
const {
690 return getMatches(query, 0, size(), params, numThreads, maxResults);
693 template <
class Query>
697 int maxResults = -1)
const {
698 return getMatches(query, 0, size(), params, numThreads, maxResults);
716 template <
class Query>
718 const Query &query,
unsigned int startIdx,
unsigned int endIdx,
719 bool recursionPossible =
true,
bool useChirality =
true,
720 bool useQueryQueryMatches =
false,
int numThreads = -1,
721 int maxResults = -1)
const {
726 return getMatches(query, startIdx, endIdx, params, numThreads, maxResults);
730 unsigned int startIdx,
734 int maxResults = -1)
const;
737 unsigned int startIdx,
741 int maxResults = -1)
const;
744 unsigned int startIdx,
748 int maxResults = -1)
const;
751 unsigned int startIdx,
755 int maxResults = -1)
const;
769 template <
class Query>
770 unsigned int countMatches(
const Query &query,
bool recursionPossible =
true,
771 bool useChirality =
true,
772 bool useQueryQueryMatches =
false,
773 int numThreads = -1)
const {
778 return countMatches(query, 0, size(), params, numThreads);
781 template <
class Query>
784 int numThreads = -1)
const {
785 return countMatches(query, 0, size(), params, numThreads);
804 template <
class Query>
806 unsigned int endIdx,
bool recursionPossible =
true,
807 bool useChirality =
true,
808 bool useQueryQueryMatches =
false,
809 int numThreads = -1)
const {
814 return countMatches(query, startIdx, endIdx, params, numThreads);
821 int numThreads = -1)
const;
826 int numThreads = -1)
const;
831 int numThreads = -1)
const;
834 unsigned int startIdx,
unsigned int endIdx,
836 int numThreads = -1)
const;
850 template <
class Query>
851 bool hasMatch(
const Query &query,
bool recursionPossible =
true,
852 bool useChirality =
true,
bool useQueryQueryMatches =
false,
853 int numThreads = -1)
const {
858 return hasMatch(query, 0, size(), params, numThreads);
861 template <
class Query>
863 int numThreads = -1)
const {
864 return hasMatch(query, 0, size(), params, numThreads);
879 template <
class Query>
880 bool hasMatch(
const Query &query,
unsigned int startIdx,
unsigned int endIdx,
881 bool recursionPossible =
true,
bool useChirality =
true,
882 bool useQueryQueryMatches =
false,
int numThreads = -1)
const {
887 return hasMatch(query, startIdx, endIdx, params, numThreads);
890 bool hasMatch(
const ROMol &query,
unsigned int startIdx,
unsigned int endIdx,
892 int numThreads = -1)
const;
896 int numThreads = -1)
const;
900 int numThreads = -1)
const;
904 int numThreads = -1)
const;
910 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const {
912 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
923 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
929 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
930 return rdcast<unsigned int>(molholder->size());
935 for (
const auto idx : order) {
936 if (idx >= mols->
size()) {
950 is_tautomerquery =
false;
951 mols = molholder.get();
952 fps = fpholder.get();
954 is_tautomerquery =
true;
Contains general bit-comparison and similarity operations.
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
#define PRECONDITION(expr, mess)
Defines a class for managing bundles of molecules.
pulls in the core RDKit functionality
a class for bit vectors that are densely occupied
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Concrete class that holds binary cached molecules in memory.
std::vector< std::string > & getMols()
unsigned int size() const override
Get the current library size.
unsigned int addMol(const ROMol &m) override
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
const std::vector< std::string > & getMols() const
unsigned int addBinary(const std::string &pickle)
Concrete class that holds smiles strings in memory.
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
const std::vector< std::string > & getMols() const
unsigned int addMol(const ROMol &m) override
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
unsigned int size() const override
Get the current library size.
Concrete class that holds trusted smiles strings in memory.
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
unsigned int addMol(const ROMol &m) override
const std::vector< std::string > & getMols() const
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
unsigned int size() const override
Get the current library size.
CachedTrustedSmilesMolHolder()
Base FPI for the fingerprinter used to rule out impossible matches.
std::vector< ExplicitBitVect * > & getFingerprints()
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
virtual unsigned int size() const
const std::vector< ExplicitBitVect * > & getFingerprints() const
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
unsigned int addFingerprint(ExplicitBitVect *v)
const ExplicitBitVect & getFingerprint(unsigned int idx) const
unsigned int addFingerprint(const ExplicitBitVect &v)
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const =0
KeyFromPropHolder(const std::string &propname="_Name")
const std::string & getKey(unsigned int idx) const override
unsigned int addKey(const std::string &key) override
unsigned int size() const override
Get the current keeyholder size.
const std::vector< std::string > & getKeys() const
std::vector< std::string > & getKeys()
std::vector< std::string > getKeys(const std::vector< unsigned int > &indices) const override
const std::string & getPropName() const
unsigned int addMol(const ROMol &m) override
Add a key to the database getting it from the molecule.
std::string & getPropName()
virtual std::vector< std::string > getKeys(const std::vector< unsigned int > &indices) const =0
virtual const std::string & getKey(unsigned int) const =0
virtual unsigned int addMol(const ROMol &m)=0
Add a key to the database getting it from the molecule.
virtual unsigned int size() const =0
Get the current keeyholder size.
virtual unsigned int addKey(const std::string &)=0
MolBundle contains a collection of related ROMols.
Base class API for holding molecules to substructure search.
virtual unsigned int addMol(const ROMol &m)=0
virtual unsigned int size() const =0
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
Concrete class that holds molecules in memory.
unsigned int addMol(const ROMol &m) override
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
unsigned int size() const override
Get the current library size.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
std::vector< boost::shared_ptr< ROMol > > & getMols()
PatternHolder(unsigned int numBits)
const unsigned int & getNumBits() const
unsigned int & getNumBits()
static unsigned int defaultNumBits()
ExplicitBitVect * makeFingerprint(const ROMol &m) const override
Caller owns the vector!
RWMol is a molecule class that is intended to be edited.
Substructure Search a library of molecules.
unsigned int countMatches(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Return the number of matches for the query.
unsigned int countMatches(const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
unsigned int addMol(const ROMol &mol)
Add a molecule to the library.
std::vector< unsigned int > getMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
void initFromStream(std::istream &ss)
initializes from a stream pickle
KeyHolderBase & getKeys()
Get the underlying key holder implementation.
std::vector< unsigned int > getMatches(const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
bool hasMatch(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Returns true if any match exists for the query.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints, boost::shared_ptr< KeyHolderBase > keys)
unsigned int countMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Return the number of matches for the query.
std::vector< unsigned int > getMatches(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
std::vector< unsigned int > getMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
std::vector< unsigned int > getMatches(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const
Get the matching indices for the query.
const MolHolderBase & getMolecules() const
void initFromString(const std::string &text)
initializes from a string pickle
unsigned int countMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
const KeyHolderBase & getKeys() const
Get the underlying key holder implementation.
const boost::shared_ptr< KeyHolderBase > & getKeyHolder() const
Get the underlying molecule holder implementation.
bool hasMatch(const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
unsigned int countMatches(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
void setSearchOrder(const std::vector< unsigned int > &order)
does error checking
bool hasMatch(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
const FPHolderBase & getFingerprints() const
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< KeyHolderBase > keys)
bool hasMatch(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
unsigned int countMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
bool hasMatch(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
boost::shared_ptr< KeyHolderBase > & getKeyHolder()
Get the underlying molecule holder implementation.
bool hasMatch(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
const std::vector< unsigned int > & getSearchOrder() const
void resetHolders()
access required for serialization
unsigned int size() const
return the number of molecules in the library
std::vector< unsigned int > getMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const
Get the matching indices for the query between the given indices.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
SubstructLibrary(const std::string &pickle)
std::string Serialize() const
returns a string with a serialized (pickled) representation
unsigned int countMatches(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
std::vector< unsigned int > getMatches(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
void toStream(std::ostream &ss) const
serializes (pickles) to a stream
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
std::vector< unsigned int > & getSearchOrder()
ExplicitBitVect * makeFingerprint(const ROMol &m) const override
Caller owns the vector!
TautomerPatternHolder(unsigned int numBits)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
RDKit::RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &ps)
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool tautomericFingerprint=false)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
bool recursionPossible
Allow recursive queries.
bool useQueryQueryMatches