RDKit
Open-source cheminformatics and machine learning.
|
Substructure Search a library of molecules. More...
#include <SubstructLibrary.h>
Public Member Functions | |
SubstructLibrary () | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules) | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints) | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< KeyHolderBase > keys) | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints, boost::shared_ptr< KeyHolderBase > keys) | |
SubstructLibrary (const std::string &pickle) | |
boost::shared_ptr< MolHolderBase > & | getMolHolder () |
Get the underlying molecule holder implementation. | |
const boost::shared_ptr< MolHolderBase > & | getMolHolder () const |
boost::shared_ptr< FPHolderBase > & | getFpHolder () |
Get the underlying molecule holder implementation. | |
const boost::shared_ptr< FPHolderBase > & | getFpHolder () const |
Get the underlying molecule holder implementation. | |
boost::shared_ptr< KeyHolderBase > & | getKeyHolder () |
Get the underlying molecule holder implementation. | |
const boost::shared_ptr< KeyHolderBase > & | getKeyHolder () const |
Get the underlying molecule holder implementation. | |
const MolHolderBase & | getMolecules () const |
FPHolderBase & | getFingerprints () |
Get the underlying fingerprint implementation. | |
const FPHolderBase & | getFingerprints () const |
KeyHolderBase & | getKeys () |
Get the underlying key holder implementation. | |
const KeyHolderBase & | getKeys () const |
Get the underlying key holder implementation. | |
unsigned int | addMol (const ROMol &mol) |
Add a molecule to the library. | |
template<class Query > | |
std::vector< unsigned int > | getMatches (const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const |
Get the matching indices for the query. | |
template<class Query > | |
std::vector< unsigned int > | getMatches (const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload | |
template<class Query > | |
std::vector< unsigned int > | getMatches (const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const |
Get the matching indices for the query between the given indices. | |
std::vector< unsigned int > | getMatches (const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload | |
std::vector< unsigned int > | getMatches (const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload | |
std::vector< unsigned int > | getMatches (const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload | |
std::vector< unsigned int > | getMatches (const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload | |
template<class Query > | |
unsigned int | countMatches (const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
Return the number of matches for the query. | |
template<class Query > | |
unsigned int | countMatches (const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
template<class Query > | |
unsigned int | countMatches (const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
Return the number of matches for the query. | |
unsigned int | countMatches (const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
unsigned int | countMatches (const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
unsigned int | countMatches (const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
unsigned int | countMatches (const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
template<class Query > | |
bool | hasMatch (const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
Returns true if any match exists for the query. | |
template<class Query > | |
bool | hasMatch (const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
template<class Query > | |
bool | hasMatch (const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
bool | hasMatch (const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
bool | hasMatch (const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
bool | hasMatch (const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
bool | hasMatch (const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload | |
boost::shared_ptr< ROMol > | getMol (unsigned int idx) const |
Returns the molecule at the given index. | |
boost::shared_ptr< ROMol > | operator[] (unsigned int idx) |
Returns the molecule at the given index. | |
unsigned int | size () const |
return the number of molecules in the library | |
void | setSearchOrder (const std::vector< unsigned int > &order) |
does error checking | |
const std::vector< unsigned int > & | getSearchOrder () const |
std::vector< unsigned int > & | getSearchOrder () |
void | resetHolders () |
access required for serialization | |
void | toStream (std::ostream &ss) const |
serializes (pickles) to a stream | |
std::string | Serialize () const |
returns a string with a serialized (pickled) representation | |
void | initFromStream (std::istream &ss) |
initializes from a stream pickle | |
void | initFromString (const std::string &text) |
initializes from a string pickle | |
Substructure Search a library of molecules.
This class allows for multithreaded substructure searches of large datasets.
The implementations can use fingerprints to speed up searches and have molecules cached as binary forms to reduce memory usage.
basic usage:
Using different mol holders and pattern fingerprints.
Cached molecule holders create molecules on demand. There are currently three styles of cached molecules.
CachedMolHolder: stores molecules in the rdkit binary format. CachedSmilesMolHolder: stores molecules in smiles format. CachedTrustedSmilesMolHolder: stores molecules in smiles format.
The CachedTrustedSmilesMolHolder is made to add molecules from a trusted source. This makes the basic assumption that RDKit was used to sanitize and canonicalize the smiles string. In practice this is considerably faster than using arbitrary smiles strings since certain assumptions can be made. Molecules generated from trusted smiles do not have ring information (although this is created in the molecule being searched if necessary).
When loading from external data, as opposed to using the "addMol" API, care must be taken to ensure that the pattern fingerprints and smiles are synchronized.
Each pattern holder has an API point for making its fingerprint. This is useful to ensure that the pattern stored in the database will be compatible with the patterns made when analyzing queries.
Finally, using the KeyFromPropHolder will store user ids or keys. By default, it uses RDKit's default _Name prop, but can be changed to any property.
Definition at line 519 of file SubstructLibrary.h.
|
inline |
Definition at line 530 of file SubstructLibrary.h.
|
inline |
Definition at line 536 of file SubstructLibrary.h.
|
inline |
Definition at line 543 of file SubstructLibrary.h.
|
inline |
Definition at line 556 of file SubstructLibrary.h.
|
inline |
Definition at line 569 of file SubstructLibrary.h.
|
inline |
Definition at line 583 of file SubstructLibrary.h.
Add a molecule to the library.
mol | Molecule to add |
returns index for the molecule in the library
unsigned int RDKit::SubstructLibrary::countMatches | ( | const ExtendedQueryMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
unsigned int RDKit::SubstructLibrary::countMatches | ( | const MolBundle & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
|
inline |
Return the number of matches for the query.
query | Molecule or Tautomer Query to match against molecules |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 770 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
|
inline |
overload
Definition at line 782 of file SubstructLibrary.h.
|
inline |
Return the number of matches for the query.
Return the number of matches for the query between the given indices
query | Query to match against molecules |
startIdx | Start index of the search |
endIdx | Ending idx (non-inclusive) of the search. |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 805 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
unsigned int RDKit::SubstructLibrary::countMatches | ( | const ROMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
unsigned int RDKit::SubstructLibrary::countMatches | ( | const TautomerQuery & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
|
inline |
Get the underlying fingerprint implementation.
Throws a value error if no fingerprints have been set
Definition at line 625 of file SubstructLibrary.h.
|
inline |
Definition at line 632 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 603 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 606 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 611 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 614 of file SubstructLibrary.h.
|
inline |
Get the underlying key holder implementation.
Throws a value error if no keyholder have been set
Definition at line 641 of file SubstructLibrary.h.
|
inline |
Get the underlying key holder implementation.
Throws a value error if no keyholder have been set
Definition at line 650 of file SubstructLibrary.h.
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const ExtendedQueryMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const MolBundle & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
|
inline |
Get the matching indices for the query.
query | Query or Tautomer Query to match against molecules |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
maxResults | Maximum results to return, -1 means return all [default -1] |
Definition at line 680 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
|
inline |
overload
Definition at line 694 of file SubstructLibrary.h.
|
inline |
Get the matching indices for the query between the given indices.
query | Query to match against molecules |
startIdx | Start index of the search |
endIdx | Ending idx (non-inclusive) of the search. |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
maxResults | Maximum results to return, -1 means return all [default -1] |
Definition at line 717 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const ROMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const TautomerQuery & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
Returns the molecule at the given index.
idx | Index of the molecule in the library (n.b. could contain null) |
Definition at line 910 of file SubstructLibrary.h.
References RDKit::MolHolderBase::getMol(), and PRECONDITION.
|
inline |
Definition at line 618 of file SubstructLibrary.h.
References PRECONDITION.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 596 of file SubstructLibrary.h.
|
inline |
Definition at line 598 of file SubstructLibrary.h.
Definition at line 947 of file SubstructLibrary.h.
Definition at line 943 of file SubstructLibrary.h.
bool RDKit::SubstructLibrary::hasMatch | ( | const ExtendedQueryMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
bool RDKit::SubstructLibrary::hasMatch | ( | const MolBundle & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
|
inline |
Returns true if any match exists for the query.
query | Molecule or Tautomer Query to match against molecules |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 851 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
|
inline |
overload
Definition at line 862 of file SubstructLibrary.h.
|
inline |
Returns true if any match exists for the query between the specified indices
query | Query to match against molecules |
startIdx | Start index of the search |
endIdx | Ending idx (inclusive) of the search. |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 880 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
bool RDKit::SubstructLibrary::hasMatch | ( | const ROMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
bool RDKit::SubstructLibrary::hasMatch | ( | const TautomerQuery & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
void RDKit::SubstructLibrary::initFromStream | ( | std::istream & | ss | ) |
initializes from a stream pickle
initializes from a string pickle
Returns the molecule at the given index.
idx | Index of the molecule in the library (n.b. could contain null) |
Definition at line 921 of file SubstructLibrary.h.
References RDKit::MolHolderBase::getMol(), and PRECONDITION.
|
inline |
access required for serialization
Definition at line 949 of file SubstructLibrary.h.
std::string RDKit::SubstructLibrary::Serialize | ( | ) | const |
returns a string with a serialized (pickled) representation
does error checking
Definition at line 934 of file SubstructLibrary.h.
References RDKit::MolHolderBase::size().
return the number of molecules in the library
Definition at line 928 of file SubstructLibrary.h.
References PRECONDITION.
void RDKit::SubstructLibrary::toStream | ( | std::ostream & | ss | ) | const |
serializes (pickles) to a stream