RDKit
Open-source cheminformatics and machine learning.
|
Contains general bit-comparison and similarity operations. More...
Go to the source code of this file.
Functions | |
template<typename T > | |
double | SimilarityWrapper (const T &bv1, const T &bv2, double(*metric)(const T &, const T &), bool returnDistance=false) |
template<typename T > | |
double | SimilarityWrapper (const T &bv1, const T &bv2, double a, double b, double(*metric)(const T &, const T &, double, double), bool returnDistance=false) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
RDKIT_DATASTRUCTS_EXPORT bool | AllProbeBitsMatch (const char *probe, const char *ref) |
RDKIT_DATASTRUCTS_EXPORT bool | AllProbeBitsMatch (const std::string &probe, const std::string &ref) |
RDKIT_DATASTRUCTS_EXPORT bool | AllProbeBitsMatch (const ExplicitBitVect &probe, const ExplicitBitVect &ref) |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT bool | AllProbeBitsMatch (const T1 &probe, const std::string &pkl) |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT bool | AllProbeBitsMatch (const T1 &probe, const T1 &ref) |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT int | NumOnBitsInCommon (const T1 &bv1, const T2 &bv2) |
returns the number of on bits in common between two bit vectors | |
RDKIT_DATASTRUCTS_EXPORT int | NumOnBitsInCommon (const ExplicitBitVect &bv1, const ExplicitBitVect &bv2) |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | TanimotoSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Tanimoto similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | CosineSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Cosine similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | KulczynskiSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Kulczynski similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | DiceSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Dice similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | TverskySimilarity (const T1 &bv1, const T2 &bv2, double a, double b) |
returns the Tversky similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | SokalSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Sokal similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | McConnaugheySimilarity (const T1 &bv1, const T2 &bv2) |
returns the McConnaughey similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | AsymmetricSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Asymmetric similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | BraunBlanquetSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Braun-Blanquet similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | RusselSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Russel similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | RogotGoldbergSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Rogot-Goldberg similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | OnBitSimilarity (const T1 &bv1, const T2 &bv2) |
returns the on bit similarity between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT int | NumBitsInCommon (const T1 &bv1, const T2 &bv2) |
returns the number of common bits (on and off) between two bit vects | |
RDKIT_DATASTRUCTS_EXPORT int | NumBitsInCommon (const ExplicitBitVect &bv1, const ExplicitBitVect &bv2) |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT double | AllBitSimilarity (const T1 &bv1, const T2 &bv2) |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT IntVect | OnBitsInCommon (const T1 &bv1, const T2 &bv2) |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT IntVect | OffBitsInCommon (const T1 &bv1, const T2 &bv2) |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT DoubleVect | OnBitProjSimilarity (const T1 &bv1, const T2 &bv2) |
returns the on-bit projected similarities between two bit vects | |
template<typename T1 , typename T2 > | |
RDKIT_DATASTRUCTS_EXPORT DoubleVect | OffBitProjSimilarity (const T1 &bv1, const T2 &bv2) |
returns the on-bit projected similarities between two bit vects | |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT T1 * | FoldFingerprint (const T1 &bv1, unsigned int factor=2) |
folds a bit vector factor times and returns the result | |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT std::string | BitVectToText (const T1 &bv1) |
returns a text representation of a bit vector (a string of 0s and 1s) | |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT std::string | BitVectToFPSText (const T1 &bv1) |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT std::string | BitVectToBinaryText (const T1 &bv1) |
returns a binary string representation of a bit vector (an array of bytes) | |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT void | UpdateBitVectFromFPSText (T1 &bv1, const std::string &fps) |
updates a bit vector from Andrew Dalke's FPS format | |
template<typename T1 > | |
RDKIT_DATASTRUCTS_EXPORT void | UpdateBitVectFromBinaryText (T1 &bv1, const std::string &fps) |
RDKIT_DATASTRUCTS_EXPORT unsigned int | CalcBitmapPopcount (const unsigned char *bv1, unsigned int nBytes) |
RDKIT_DATASTRUCTS_EXPORT double | CalcBitmapTanimoto (const unsigned char *bv1, const unsigned char *bv2, unsigned int nBytes) |
RDKIT_DATASTRUCTS_EXPORT double | CalcBitmapDice (const unsigned char *bv1, const unsigned char *bv2, unsigned int nBytes) |
RDKIT_DATASTRUCTS_EXPORT double | CalcBitmapTversky (const unsigned char *bv1, const unsigned char *bv2, unsigned int nBytes, double ca, double cb) |
RDKIT_DATASTRUCTS_EXPORT bool | CalcBitmapAllProbeBitsMatch (const unsigned char *probe, const unsigned char *ref, unsigned int nBytes) |
Contains general bit-comparison and similarity operations.
The notation used to document the similarity metrics is:
V1_n:
number of bits in vector 1V1_o:
number of on bits in vector 1(V1&V2)_o
: number of on bits in the intersection of vectors 1 and 2 Definition in file BitOps.h.
RDKIT_DATASTRUCTS_EXPORT double AllBitSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the common-bit similarity (on and off) between two bit vects This is also called Manhattan similarity.
[bv1_n - (bv1^bv2)_o] / bv1_n
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch | ( | const char * | probe, |
const char * | ref | ||
) |
Referenced by RDKit::FPHolderBase::passesFilter().
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch | ( | const ExplicitBitVect & | probe, |
const ExplicitBitVect & | ref | ||
) |
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch | ( | const std::string & | probe, |
const std::string & | ref | ||
) |
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch | ( | const T1 & | probe, |
const std::string & | pkl | ||
) |
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch | ( | const T1 & | probe, |
const T1 & | ref | ||
) |
RDKIT_DATASTRUCTS_EXPORT double AsymmetricSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Asymmetric similarity between two bit vects
(bv1&bv2)_o / min(bv1_o,bv2_o)
RDKIT_DATASTRUCTS_EXPORT std::string BitVectToBinaryText | ( | const T1 & | bv1 | ) |
returns a binary string representation of a bit vector (an array of bytes)
bv1 | the vector to use |
RDKIT_DATASTRUCTS_EXPORT std::string BitVectToFPSText | ( | const T1 & | bv1 | ) |
returns a hex representation of a bit vector compatible with Andrew Dalke's FPS format
bv1 | the vector to use |
RDKIT_DATASTRUCTS_EXPORT std::string BitVectToText | ( | const T1 & | bv1 | ) |
returns a text representation of a bit vector (a string of 0s and 1s)
bv1 | the vector to use |
RDKIT_DATASTRUCTS_EXPORT double BraunBlanquetSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Braun-Blanquet similarity between two bit vects
(bv1&bv2)_o / max(bv1_o,bv2_o)
RDKIT_DATASTRUCTS_EXPORT bool CalcBitmapAllProbeBitsMatch | ( | const unsigned char * | probe, |
const unsigned char * | ref, | ||
unsigned int | nBytes | ||
) |
RDKIT_DATASTRUCTS_EXPORT double CalcBitmapDice | ( | const unsigned char * | bv1, |
const unsigned char * | bv2, | ||
unsigned int | nBytes | ||
) |
RDKIT_DATASTRUCTS_EXPORT unsigned int CalcBitmapPopcount | ( | const unsigned char * | bv1, |
unsigned int | nBytes | ||
) |
RDKIT_DATASTRUCTS_EXPORT double CalcBitmapTanimoto | ( | const unsigned char * | bv1, |
const unsigned char * | bv2, | ||
unsigned int | nBytes | ||
) |
RDKIT_DATASTRUCTS_EXPORT double CalcBitmapTversky | ( | const unsigned char * | bv1, |
const unsigned char * | bv2, | ||
unsigned int | nBytes, | ||
double | ca, | ||
double | cb | ||
) |
RDKIT_DATASTRUCTS_EXPORT double CosineSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Cosine similarity between two bit vects
(bv1&bv2)_o / sqrt(bv1_o + bv2_o)
RDKIT_DATASTRUCTS_EXPORT double DiceSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Dice similarity between two bit vects
2*(bv1&bv2)_o / [bv1_o + bv2_o]
RDKIT_DATASTRUCTS_EXPORT T1 * FoldFingerprint | ( | const T1 & | bv1, |
unsigned int | factor = 2 |
||
) |
folds a bit vector factor
times and returns the result
bv1 | the vector to be folded |
factor | (optional) the number of times to fold it |
bv1_n/factor
long.Note: The caller is responsible for delete
ing the result.
Referenced by SimilarityWrapper(), and SimilarityWrapper().
RDKIT_DATASTRUCTS_EXPORT double KulczynskiSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Kulczynski similarity between two bit vects
(bv1&bv2)_o * [bv1_o + bv2_o] / [2 * bv1_o * bv2_o]
RDKIT_DATASTRUCTS_EXPORT double McConnaugheySimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the McConnaughey similarity between two bit vects
[(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o * bv2_o)
RDKIT_DATASTRUCTS_EXPORT int NumBitsInCommon | ( | const ExplicitBitVect & | bv1, |
const ExplicitBitVect & | bv2 | ||
) |
RDKIT_DATASTRUCTS_EXPORT int NumBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the number of common bits (on and off) between two bit vects
bv1_n - (bv1^bv2)_o
RDKIT_DATASTRUCTS_EXPORT int NumOnBitsInCommon | ( | const ExplicitBitVect & | bv1, |
const ExplicitBitVect & | bv2 | ||
) |
RDKIT_DATASTRUCTS_EXPORT int NumOnBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the number of on bits in common between two bit vectors
RDKIT_DATASTRUCTS_EXPORT DoubleVect OffBitProjSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the on-bit projected similarities between two bit vects
[bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]
[bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]
Note: bv1_n = bv2_n
RDKIT_DATASTRUCTS_EXPORT IntVect OffBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns an IntVect with indices of all off bits in common between two bit vects
RDKIT_DATASTRUCTS_EXPORT DoubleVect OnBitProjSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the on-bit projected similarities between two bit vects
(bv1&bv2)_o / bv1_o
(bv1&bv2)_o / bv2_o
RDKIT_DATASTRUCTS_EXPORT double OnBitSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the on bit similarity between two bit vects
(bv1&bv2)_o / (bv1|bv2)_o
RDKIT_DATASTRUCTS_EXPORT IntVect OnBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns an IntVect with indices of all on bits in common between two bit vects
RDKIT_DATASTRUCTS_EXPORT double RogotGoldbergSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Rogot-Goldberg similarity between two bit vects
(bv1&bv2)_o / (bv1_o + bv2_o)
- (bv1_n - bv1_o - bv2_o + (bv1&bv2)_o) / (2*bv1_n - bv1_o - bv2_o)
RDKIT_DATASTRUCTS_EXPORT double RusselSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Russel similarity between two bit vects
(bv1&bv2)_o / bv1_o
Note: that this operation is non-commutative: RusselSimilarity(bv1,bv2) != RusselSimilarity(bv2,bv1)
double SimilarityWrapper | ( | const T & | bv1, |
const T & | bv2, | ||
double | a, | ||
double | b, | ||
double(*)(const T &, const T &, double, double) | metric, | ||
bool | returnDistance = false |
||
) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
Definition at line 53 of file BitOps.h.
References FoldFingerprint().
double SimilarityWrapper | ( | const T & | bv1, |
const T & | bv2, | ||
double(*)(const T &, const T &) | metric, | ||
bool | returnDistance = false |
||
) |
general purpose wrapper for calculating the similarity between two bvs that may be of unequal size (will automatically fold as appropriate)
Definition at line 31 of file BitOps.h.
References FoldFingerprint().
Referenced by RDDataManip::TanimotoDistanceMetric(), and RDDataManip::TanimotoSimilarityMetric().
RDKIT_DATASTRUCTS_EXPORT double SokalSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Sokal similarity between two bit vects
(bv1&bv2)_o / [2*bv1_o + 2*bv2_o - 3*(bv1&bv2)_o]
RDKIT_DATASTRUCTS_EXPORT double TanimotoSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Tanimoto similarity between two bit vects
(bv1&bv2)_o / [bv1_o + bv2_o - (bv1&bv2)_o]
Referenced by RDDataManip::TanimotoDistanceMetric(), and RDDataManip::TanimotoSimilarityMetric().
RDKIT_DATASTRUCTS_EXPORT double TverskySimilarity | ( | const T1 & | bv1, |
const T2 & | bv2, | ||
double | a, | ||
double | b | ||
) |
RDKIT_DATASTRUCTS_EXPORT void UpdateBitVectFromBinaryText | ( | T1 & | bv1, |
const std::string & | fps | ||
) |
updates a bit vector from a binary string representation of a bit vector (an array of bytes)
bv1 | the vector to use |
fps | the binary string |
RDKIT_DATASTRUCTS_EXPORT void UpdateBitVectFromFPSText | ( | T1 & | bv1, |
const std::string & | fps | ||
) |
updates a bit vector from Andrew Dalke's FPS format
bv1 | the vector to use |
fps | the FPS hex string |