RDKit
Open-source cheminformatics and machine learning.
|
Functions | |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * | getAtomPairFingerprint (const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1) |
returns the atom-pair fingerprint for a molecule | |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * | getAtomPairFingerprint (const ROMol &mol, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * | getHashedAtomPairFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1) |
returns the hashed atom-pair fingerprint for a molecule | |
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * | getHashedAtomPairFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true, int confId=-1) |
returns the hashed atom-pair fingerprint for a molecule as a bit vector | |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * | getTopologicalTorsionFingerprint (const ROMol &mol, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false) |
returns the topological-torsion fingerprint for a molecule | |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * | getHashedTopologicalTorsionFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false) |
returns a hashed topological-torsion fingerprint for a molecule | |
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * | getHashedTopologicalTorsionFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false) |
RDKIT_FINGERPRINTS_EXPORT std::uint32_t | getAtomCode (const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false) |
RDKIT_FINGERPRINTS_EXPORT std::uint32_t | getAtomPairCode (std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist, bool includeChirality=false) |
RDKIT_FINGERPRINTS_EXPORT std::uint64_t | getTopologicalTorsionCode (const std::vector< std::uint32_t > &atomCodes, bool includeChirality=false) |
RDKIT_FINGERPRINTS_EXPORT std::uint32_t | getTopologicalTorsionHash (const std::vector< std::uint32_t > &pathCodes) |
Variables | |
const std::string | atomPairsVersion = "1.1.0" |
const unsigned int | numTypeBits = 4 |
const unsigned int | atomNumberTypes [1<< numTypeBits] |
const unsigned int | numPiBits = 2 |
const unsigned int | maxNumPi = (1 << numPiBits) - 1 |
const unsigned int | numBranchBits = 3 |
const unsigned int | maxNumBranches = (1 << numBranchBits) - 1 |
const unsigned int | numChiralBits = 2 |
const unsigned int | codeSize = numTypeBits + numPiBits + numBranchBits |
const unsigned int | numPathBits = 5 |
const unsigned int | maxPathLen = (1 << numPathBits) - 1 |
const unsigned int | numAtomPairFingerprintBits |
RDKIT_FINGERPRINTS_EXPORT std::uint32_t RDKit::AtomPairs::getAtomCode | ( | const Atom * | atom, |
unsigned int | branchSubtract = 0 , |
||
bool | includeChirality = false |
||
) |
returns a numeric code for the atom (the atom's hash in the atom-pair scheme)
atom | the atom to be considered |
branchSubtract | (optional) a constant to subtract from the number of neighbors when the hash is calculated (used in the topological torsions code) |
includeChirality | toggles the inclusions of bits indicating R/S chirality |
RDKIT_FINGERPRINTS_EXPORT std::uint32_t RDKit::AtomPairs::getAtomPairCode | ( | std::uint32_t | codeI, |
std::uint32_t | codeJ, | ||
unsigned int | dist, | ||
bool | includeChirality = false |
||
) |
returns an atom pair hash based on two atom hashes and the distance between the atoms.
codeI | the hash for the first atom |
codeJ | the hash for the second atom |
dist | the distance (number of bonds) between the two atoms |
includeChirality | toggles the inclusions of bits indicating R/S chirality |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * RDKit::AtomPairs::getAtomPairFingerprint | ( | const ROMol & | mol, |
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
bool | includeChirality = false , |
||
bool | use2D = true , |
||
int | confId = -1 |
||
) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * RDKit::AtomPairs::getAtomPairFingerprint | ( | const ROMol & | mol, |
unsigned int | minLength, | ||
unsigned int | maxLength, | ||
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
bool | includeChirality = false , |
||
bool | use2D = true , |
||
int | confId = -1 |
||
) |
returns the atom-pair fingerprint for a molecule
The algorithm used is described here: R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as Molecular Features in Structure-Activity Studies: Definition and Applications" JCICS 25, 64-73 (1985).
mol | the molecule to be fingerprinted |
minLength | minimum distance between atoms to be considered in a pair. Default is 1 bond. |
maxLength | maximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds. |
fromAtoms | if provided, only atom pairs that involve the specified atoms will be included in the fingerprint |
ignoreAtoms | if provided, any atom pairs that include the specified atoms will not be included in the fingerprint |
atomInvariants | a list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used. |
includeChirality | if set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided) |
use2D | if set, the 2D (topological) distance matrix is used. |
confId | the conformation to use if 3D distances are being used |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * RDKit::AtomPairs::getHashedAtomPairFingerprint | ( | const ROMol & | mol, |
unsigned int | nBits = 2048 , |
||
unsigned int | minLength = 1 , |
||
unsigned int | maxLength = maxPathLen - 1 , |
||
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
bool | includeChirality = false , |
||
bool | use2D = true , |
||
int | confId = -1 |
||
) |
returns the hashed atom-pair fingerprint for a molecule
mol | the molecule to be fingerprinted |
nBits | the length of the fingerprint to generate |
minLength | minimum distance between atoms to be considered in a pair. Default is 1 bond. |
maxLength | maximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds. |
fromAtoms | if provided, only atom pairs that involve the specified atoms will be included in the fingerprint |
ignoreAtoms | if provided, any atom pairs that include the specified atoms will not be included in the fingerprint |
atomInvariants | a list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used. |
includeChirality | if set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided) |
use2D | if set, the 2D (topological) distance matrix is used. |
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect | ( | const ROMol & | mol, |
unsigned int | nBits = 2048 , |
||
unsigned int | minLength = 1 , |
||
unsigned int | maxLength = maxPathLen - 1 , |
||
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
unsigned int | nBitsPerEntry = 4 , |
||
bool | includeChirality = false , |
||
bool | use2D = true , |
||
int | confId = -1 |
||
) |
returns the hashed atom-pair fingerprint for a molecule as a bit vector
mol | the molecule to be fingerprinted |
nBits | the length of the fingerprint to generate |
minLength | minimum distance between atoms to be considered in a pair. Default is 1 bond. |
maxLength | maximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds. |
fromAtoms | if provided, only atom pairs that involve the specified atoms will be included in the fingerprint |
ignoreAtoms | if provided, any atom pairs that include the specified atoms will not be included in the fingerprint |
atomInvariants | a list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used. |
nBitsPerEntry | number of bits to use in simulating counts |
includeChirality | if set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided) |
use2D | if set, the 2D (topological) distance matrix is used. |
confId | the conformation to use if 3D distances are being used |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint | ( | const ROMol & | mol, |
unsigned int | nBits = 2048 , |
||
unsigned int | targetSize = 4 , |
||
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
bool | includeChirality = false |
||
) |
returns a hashed topological-torsion fingerprint for a molecule
The algorithm used is described here: R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; "Topological Torsion: A New Molecular Descriptor for SAR Applications. Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
mol | the molecule to be fingerprinted |
nBits | number of bits to include in the fingerprint |
targetSize | the number of atoms to include in the "torsions" |
fromAtoms | if provided, only torsions that start or end at the specified atoms will be included in the fingerprint |
ignoreAtoms | if provided, any torsions that include the specified atoms will not be included in the fingerprint |
atomInvariants | a list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used. |
includeChirality | if set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided) |
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect | ( | const ROMol & | mol, |
unsigned int | nBits = 2048 , |
||
unsigned int | targetSize = 4 , |
||
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
unsigned int | nBitsPerEntry = 4 , |
||
bool | includeChirality = false |
||
) |
returns a hashed topological-torsion fingerprint for a molecule as a bit vector
mol | the molecule to be fingerprinted |
nBits | number of bits to include in the fingerprint |
targetSize | the number of atoms to include in the "torsions" |
fromAtoms | if provided, only torsions that start or end at the specified atoms will be included in the fingerprint |
ignoreAtoms | if provided, any torsions that include the specified atoms will not be included in the fingerprint |
atomInvariants | a list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used. |
nBitsPerEntry | number of bits to use in simulating counts |
includeChirality | if set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided) |
RDKIT_FINGERPRINTS_EXPORT std::uint64_t RDKit::AtomPairs::getTopologicalTorsionCode | ( | const std::vector< std::uint32_t > & | atomCodes, |
bool | includeChirality = false |
||
) |
returns an topological torsion hash based on the atom hashes passed in
atomCodes | the vector of atom hashes |
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * RDKit::AtomPairs::getTopologicalTorsionFingerprint | ( | const ROMol & | mol, |
unsigned int | targetSize = 4 , |
||
const std::vector< std::uint32_t > * | fromAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | ignoreAtoms = nullptr , |
||
const std::vector< std::uint32_t > * | atomInvariants = nullptr , |
||
bool | includeChirality = false |
||
) |
returns the topological-torsion fingerprint for a molecule
The algorithm used is described here: R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; "Topological Torsion: A New Molecular Descriptor for SAR Applications. Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
mol | the molecule to be fingerprinted |
targetSize | the number of atoms to include in the "torsions" |
fromAtoms | if provided, only torsions that start or end at the specified atoms will be included in the fingerprint |
ignoreAtoms | if provided, any torsions that include the specified atoms will not be included in the fingerprint |
atomInvariants | a list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used. |
includeChirality | if set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided) |
RDKIT_FINGERPRINTS_EXPORT std::uint32_t RDKit::AtomPairs::getTopologicalTorsionHash | ( | const std::vector< std::uint32_t > & | pathCodes | ) |
const unsigned int RDKit::AtomPairs::atomNumberTypes[1<< numTypeBits] |
Definition at line 29 of file FingerprintUtil.h.
const std::string RDKit::AtomPairs::atomPairsVersion = "1.1.0" |
Definition at line 44 of file AtomPairs.h.
const unsigned int RDKit::AtomPairs::codeSize = numTypeBits + numPiBits + numBranchBits |
Definition at line 36 of file FingerprintUtil.h.
const unsigned int RDKit::AtomPairs::maxNumBranches = (1 << numBranchBits) - 1 |
Definition at line 34 of file FingerprintUtil.h.
Definition at line 32 of file FingerprintUtil.h.
const unsigned int RDKit::AtomPairs::maxPathLen = (1 << numPathBits) - 1 |
Definition at line 38 of file FingerprintUtil.h.
Definition at line 39 of file FingerprintUtil.h.
Definition at line 33 of file FingerprintUtil.h.
Definition at line 35 of file FingerprintUtil.h.
Definition at line 37 of file FingerprintUtil.h.
Definition at line 31 of file FingerprintUtil.h.
Definition at line 28 of file FingerprintUtil.h.