24namespace MHFPFingerprints {
33 const unsigned char *ptr = (
const unsigned char *)
str.c_str();
59 MHFPEncoder(
unsigned int n_permutations = 2048,
unsigned int seed = 42);
88 std::vector<uint32_t>
FromArray(
const std::vector<uint32_t> &vec);
115 unsigned char radius = 3,
117 bool isomeric =
false,
118 bool kekulize =
false,
119 unsigned char min_radius = 1);
123 unsigned char radius = 3,
125 bool isomeric =
false,
126 bool kekulize =
false,
127 unsigned char min_radius = 1);
153 std::vector<uint32_t>
Encode(
ROMol &mol,
unsigned char radius = 3,
154 bool rings =
true,
bool isomeric =
false,
155 bool kekulize =
false,
156 unsigned char min_radius = 1);
159 std::vector<std::vector<uint32_t>>
Encode(std::vector<ROMol> &mols,
160 unsigned char radius = 3,
162 bool isomeric =
false,
163 bool kekulize =
false,
164 unsigned char min_radius = 1);
167 std::vector<uint32_t>
Encode(std::string &smiles,
unsigned char radius = 3,
168 bool rings =
true,
bool isomeric =
false,
169 bool kekulize =
false,
170 unsigned char min_radius = 1);
173 std::vector<std::vector<uint32_t>>
Encode(std::vector<std::string> &smiles,
174 unsigned char radius = 3,
176 bool isomeric =
false,
177 bool kekulize =
false,
178 unsigned char min_radius = 1);
206 bool rings =
true,
bool isomeric =
false,
207 bool kekulize =
false,
208 unsigned char min_radius = 1,
209 size_t length = 2048);
213 std::vector<ROMol> &mols,
unsigned char radius = 3,
bool rings =
true,
214 bool isomeric =
false,
bool kekulize =
false,
215 unsigned char min_radius = 1,
size_t length = 2048);
219 bool rings =
true,
bool isomeric =
false,
220 bool kekulize =
false,
221 unsigned char min_radius = 1,
222 size_t length = 2048);
226 std::vector<std::string> &smiles,
unsigned char radius = 3,
227 bool rings =
true,
bool isomeric =
false,
bool kekulize =
false,
228 unsigned char min_radius = 1,
size_t length = 2048);
239 static double Distance(
const std::vector<uint32_t> &a,
240 const std::vector<uint32_t> &b) {
241 size_t mismatches = 0;
243 for (
size_t i = 0; i < a.size(); i++) {
249 return mismatches / (double)a.size();
254 uint64_t FastMod(
const uint64_t input,
const uint64_t ceil) {
255 return input >= ceil ? input % ceil : input;
259 uint32_t length = 2048) {
261 for (
size_t i = 0; i < vec.size(); i++) {
262 ebv.setBit(vec[i] % length);
267 std::vector<uint32_t> HashShingling(std::vector<std::string> vec) {
268 std::vector<uint32_t> result(vec.size());
269 for (
size_t i = 0; i < vec.size(); i++) {
270 result[i] = FNV::hash(vec[i]);
275 unsigned int n_permutations_, seed_;
276 uint64_t prime_ = 2305843009213693951UL;
277 uint32_t max_hash_ = 4294967295;
278 std::vector<uint32_t> perms_a_;
279 std::vector<uint32_t> perms_b_;
Defines the primary molecule class ROMol as well as associated typedefs.
a class for bit vectors that are densely occupied
static double Distance(const std::vector< uint32_t > &a, const std::vector< uint32_t > &b)
Calculates the Hamming distance between two MHFP fingerprints.
ExplicitBitVect EncodeSECFP(std::string &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1, size_t length=2048)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ExplicitBitVect > EncodeSECFP(std::vector< std::string > &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1, size_t length=2048)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::string > CreateShingling(const std::string &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::string > CreateShingling(const ROMol &mol, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1)
Creates a molecular shingling based on circular substructures.
std::vector< ExplicitBitVect > EncodeSECFP(std::vector< ROMol > &mols, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1, size_t length=2048)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::vector< uint32_t > > Encode(std::vector< ROMol > &mols, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< uint32_t > Encode(std::string &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
ExplicitBitVect EncodeSECFP(ROMol &mol, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1, size_t length=2048)
Creates a binary fingerprint based on circular sub-SMILES.
std::vector< uint32_t > Encode(ROMol &mol, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1)
Creates a MinHash vector from a molecule.
MHFPEncoder(unsigned int n_permutations=2048, unsigned int seed=42)
Constructor.
std::vector< uint32_t > FromArray(const std::vector< uint32_t > &vec)
Creates a MinHash from a list of unsigned integers.
std::vector< uint32_t > FromStringArray(const std::vector< std::string > &vec)
Creates a MinHash from a vector of strings.
std::vector< std::vector< uint32_t > > Encode(std::vector< std::string > &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=false, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
a class for bit vectors that are sparsely occupied.
#define RDKIT_FINGERPRINTS_EXPORT
uint32_t hash(const std::string &str, uint32_t hash=seed)
A simple implementation of the Fowler–Noll–Vo hash function.
const std::string mhfpFingerprintVersion
bool rdvalue_is(const RDValue_cast_t)