RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RDKit::AtomPairs Namespace Reference

Functions

RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getAtomPairFingerprint (const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
 returns the atom-pair fingerprint for a molecule
 
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getAtomPairFingerprint (const ROMol &mol, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getHashedAtomPairFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
 returns the hashed atom-pair fingerprint for a molecule
 
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVectgetHashedAtomPairFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true, int confId=-1)
 returns the hashed atom-pair fingerprint for a molecule as a bit vector
 
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getTopologicalTorsionFingerprint (const ROMol &mol, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false)
 returns the topological-torsion fingerprint for a molecule
 
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getHashedTopologicalTorsionFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false)
 returns a hashed topological-torsion fingerprint for a molecule
 
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVectgetHashedTopologicalTorsionFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false)
 
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomCode (const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false)
 
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomPairCode (std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist, bool includeChirality=false)
 
RDKIT_FINGERPRINTS_EXPORT std::uint64_t getTopologicalTorsionCode (const std::vector< std::uint32_t > &atomCodes, bool includeChirality=false)
 
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getTopologicalTorsionHash (const std::vector< std::uint32_t > &pathCodes)
 

Variables

const std::string atomPairsVersion = "1.1.0"
 
const unsigned int numTypeBits = 4
 
const unsigned int atomNumberTypes [1<< numTypeBits]
 
const unsigned int numPiBits = 2
 
const unsigned int maxNumPi = (1 << numPiBits) - 1
 
const unsigned int numBranchBits = 3
 
const unsigned int maxNumBranches = (1 << numBranchBits) - 1
 
const unsigned int numChiralBits = 2
 
const unsigned int codeSize = numTypeBits + numPiBits + numBranchBits
 
const unsigned int numPathBits = 5
 
const unsigned int maxPathLen = (1 << numPathBits) - 1
 
const unsigned int numAtomPairFingerprintBits
 

Function Documentation

◆ getAtomCode()

RDKIT_FINGERPRINTS_EXPORT std::uint32_t RDKit::AtomPairs::getAtomCode ( const Atom atom,
unsigned int  branchSubtract = 0,
bool  includeChirality = false 
)

returns a numeric code for the atom (the atom's hash in the atom-pair scheme)

Parameters
atomthe atom to be considered
branchSubtract(optional) a constant to subtract from the number of neighbors when the hash is calculated (used in the topological torsions code)
includeChiralitytoggles the inclusions of bits indicating R/S chirality

◆ getAtomPairCode()

RDKIT_FINGERPRINTS_EXPORT std::uint32_t RDKit::AtomPairs::getAtomPairCode ( std::uint32_t  codeI,
std::uint32_t  codeJ,
unsigned int  dist,
bool  includeChirality = false 
)

returns an atom pair hash based on two atom hashes and the distance between the atoms.

Parameters
codeIthe hash for the first atom
codeJthe hash for the second atom
distthe distance (number of bonds) between the two atoms
includeChiralitytoggles the inclusions of bits indicating R/S chirality

◆ getAtomPairFingerprint() [1/2]

RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * RDKit::AtomPairs::getAtomPairFingerprint ( const ROMol mol,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ getAtomPairFingerprint() [2/2]

RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * RDKit::AtomPairs::getAtomPairFingerprint ( const ROMol mol,
unsigned int  minLength,
unsigned int  maxLength,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

returns the atom-pair fingerprint for a molecule

The algorithm used is described here: R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as Molecular Features in Structure-Activity Studies: Definition and Applications" JCICS 25, 64-73 (1985).

Parameters
molthe molecule to be fingerprinted
minLengthminimum distance between atoms to be considered in a pair. Default is 1 bond.
maxLengthmaximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds.
fromAtomsif provided, only atom pairs that involve the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any atom pairs that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
use2Dif set, the 2D (topological) distance matrix is used.
confIdthe conformation to use if 3D distances are being used
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

◆ getHashedAtomPairFingerprint()

RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * RDKit::AtomPairs::getHashedAtomPairFingerprint ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  minLength = 1,
unsigned int  maxLength = maxPathLen - 1,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

returns the hashed atom-pair fingerprint for a molecule

Parameters
molthe molecule to be fingerprinted
nBitsthe length of the fingerprint to generate
minLengthminimum distance between atoms to be considered in a pair. Default is 1 bond.
maxLengthmaximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds.
fromAtomsif provided, only atom pairs that involve the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any atom pairs that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
use2Dif set, the 2D (topological) distance matrix is used.
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

◆ getHashedAtomPairFingerprintAsBitVect()

RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  minLength = 1,
unsigned int  maxLength = maxPathLen - 1,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
unsigned int  nBitsPerEntry = 4,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

returns the hashed atom-pair fingerprint for a molecule as a bit vector

Parameters
molthe molecule to be fingerprinted
nBitsthe length of the fingerprint to generate
minLengthminimum distance between atoms to be considered in a pair. Default is 1 bond.
maxLengthmaximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds.
fromAtomsif provided, only atom pairs that involve the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any atom pairs that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
nBitsPerEntrynumber of bits to use in simulating counts
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
use2Dif set, the 2D (topological) distance matrix is used.
confIdthe conformation to use if 3D distances are being used
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

◆ getHashedTopologicalTorsionFingerprint()

RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  targetSize = 4,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
bool  includeChirality = false 
)

returns a hashed topological-torsion fingerprint for a molecule

The algorithm used is described here: R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; "Topological Torsion: A New Molecular Descriptor for SAR Applications. Comparison with Other Descriptors" JCICS 27, 82-85 (1987).

Parameters
molthe molecule to be fingerprinted
nBitsnumber of bits to include in the fingerprint
targetSizethe number of atoms to include in the "torsions"
fromAtomsif provided, only torsions that start or end at the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any torsions that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

◆ getHashedTopologicalTorsionFingerprintAsBitVect()

RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  targetSize = 4,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
unsigned int  nBitsPerEntry = 4,
bool  includeChirality = false 
)

returns a hashed topological-torsion fingerprint for a molecule as a bit vector

Parameters
molthe molecule to be fingerprinted
nBitsnumber of bits to include in the fingerprint
targetSizethe number of atoms to include in the "torsions"
fromAtomsif provided, only torsions that start or end at the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any torsions that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
nBitsPerEntrynumber of bits to use in simulating counts
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

◆ getTopologicalTorsionCode()

RDKIT_FINGERPRINTS_EXPORT std::uint64_t RDKit::AtomPairs::getTopologicalTorsionCode ( const std::vector< std::uint32_t > &  atomCodes,
bool  includeChirality = false 
)

returns an topological torsion hash based on the atom hashes passed in

Parameters
atomCodesthe vector of atom hashes

◆ getTopologicalTorsionFingerprint()

RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * RDKit::AtomPairs::getTopologicalTorsionFingerprint ( const ROMol mol,
unsigned int  targetSize = 4,
const std::vector< std::uint32_t > *  fromAtoms = nullptr,
const std::vector< std::uint32_t > *  ignoreAtoms = nullptr,
const std::vector< std::uint32_t > *  atomInvariants = nullptr,
bool  includeChirality = false 
)

returns the topological-torsion fingerprint for a molecule

The algorithm used is described here: R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; "Topological Torsion: A New Molecular Descriptor for SAR Applications. Comparison with Other Descriptors" JCICS 27, 82-85 (1987).

Parameters
molthe molecule to be fingerprinted
targetSizethe number of atoms to include in the "torsions"
fromAtomsif provided, only torsions that start or end at the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any torsions that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

◆ getTopologicalTorsionHash()

RDKIT_FINGERPRINTS_EXPORT std::uint32_t RDKit::AtomPairs::getTopologicalTorsionHash ( const std::vector< std::uint32_t > &  pathCodes)

Variable Documentation

◆ atomNumberTypes

const unsigned int RDKit::AtomPairs::atomNumberTypes[1<< numTypeBits]
Initial value:
= {
5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 51, 52, 43}

Definition at line 29 of file FingerprintUtil.h.

◆ atomPairsVersion

const std::string RDKit::AtomPairs::atomPairsVersion = "1.1.0"

Definition at line 44 of file AtomPairs.h.

◆ codeSize

const unsigned int RDKit::AtomPairs::codeSize = numTypeBits + numPiBits + numBranchBits

Definition at line 36 of file FingerprintUtil.h.

◆ maxNumBranches

const unsigned int RDKit::AtomPairs::maxNumBranches = (1 << numBranchBits) - 1

Definition at line 34 of file FingerprintUtil.h.

◆ maxNumPi

const unsigned int RDKit::AtomPairs::maxNumPi = (1 << numPiBits) - 1

Definition at line 32 of file FingerprintUtil.h.

◆ maxPathLen

const unsigned int RDKit::AtomPairs::maxPathLen = (1 << numPathBits) - 1

Definition at line 38 of file FingerprintUtil.h.

◆ numAtomPairFingerprintBits

const unsigned int RDKit::AtomPairs::numAtomPairFingerprintBits
Initial value:
=
const unsigned int numPathBits
const unsigned int codeSize

Definition at line 39 of file FingerprintUtil.h.

◆ numBranchBits

const unsigned int RDKit::AtomPairs::numBranchBits = 3

Definition at line 33 of file FingerprintUtil.h.

◆ numChiralBits

const unsigned int RDKit::AtomPairs::numChiralBits = 2

Definition at line 35 of file FingerprintUtil.h.

◆ numPathBits

const unsigned int RDKit::AtomPairs::numPathBits = 5

Definition at line 37 of file FingerprintUtil.h.

◆ numPiBits

const unsigned int RDKit::AtomPairs::numPiBits = 2

Definition at line 31 of file FingerprintUtil.h.

◆ numTypeBits

const unsigned int RDKit::AtomPairs::numTypeBits = 4

Definition at line 28 of file FingerprintUtil.h.