RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RascalClusterOptions.h
Go to the documentation of this file.
1
//
2
// Copyright (C) David Cosgrove 2023
3
//
4
// @@ All Rights Reserved @@
5
// This file is part of the RDKit.
6
// The contents are covered by the terms of the BSD license
7
// which is included in the file license.txt, found at the root
8
// of the RDKit source tree.
9
//
10
// Options for Rascal Clustering. In general, the option names and defaults
11
// are taken from the paper:
12
// 'A Line Graph Algorithm for Clustering Chemical Structures Based
13
// on Common Substructural Cores', JW Raymond, PW Willett.
14
// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
15
// https://eprints.whiterose.ac.uk/77598/
16
17
#include <
RDGeneral/export.h
>
18
19
#ifndef RASCALCLUSTEROPTIONS_H
20
#define RASCALCLUSTEROPTIONS_H
21
22
namespace
RDKit
{
23
namespace
RascalMCES {
24
25
struct
RDKIT_RASCALMCES_EXPORT
RascalClusterOptions
{
26
double
similarityCutoff = 0.7;
/* Similarity cutoff for clustering. Initial
27
clusters will have molecule pairs of at
28
least this similarity. */
29
double
a = 0.05;
/* penalty score for each unconnected component in MCES */
30
double
b = 2.0;
/* weight of matched bonds over matched atoms */
31
unsigned
int
minFragSize =
32
3;
/* minimum number of atoms in a fragment for it to
33
be included in the MCES. Also p in the paper. */
34
double
minIntraClusterSim = 0.9;
/* two pairs of molecules are included in the
35
same cluster if the similarity between
36
their MCESs is greater than this. S_a
37
in the paper */
38
double
clusterMergeSim = 0.6;
/* two clusters are merged if fraction of
39
molecules they have in common is greater than
40
this. S_b in the paper */
41
unsigned
int
maxNumFrags = 2;
/* The maximum number of fragments in any MCES.
42
Otherwise the MCES can be a lot of small
43
fragments scattered across the molecule - it
44
tries too hard to find a match, sometimes */
45
int
numThreads = -1;
/* The number of threads to use. If > 0, will use that
46
number. If <= 0, will use the number of hardware
47
threads plus this number. So if the number of
48
hardware threads is 8, and numThreads is -1, it will
49
use 7 threads. */
50
};
51
}
// namespace RascalMCES
52
}
// namespace RDKit
53
#endif
// RASCALCLUSTEROPTIONS_H
export.h
RDKIT_RASCALMCES_EXPORT
#define RDKIT_RASCALMCES_EXPORT
Definition
export.h:441
RDKit
Std stuff.
Definition
Abbreviations.h:19
RDKit::RascalMCES::RascalClusterOptions
Definition
RascalClusterOptions.h:25
GraphMol
RascalMCES
RascalClusterOptions.h
Generated on Mon Sep 30 2024 05:19:34 for RDKit by
1.9.8