RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ScaffoldNetwork.h
Go to the documentation of this file.
1//
2// Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SCAFFOLDNETWORK_H
12#define RD_SCAFFOLDNETWORK_H
13
14#include <vector>
15#include <map>
16#include <string>
17#include <sstream>
18#include <memory>
19#include <iostream>
20
21#ifdef RDK_USE_BOOST_SERIALIZATION
22#include <RDGeneral/Invariant.h>
24#include <boost/archive/text_oarchive.hpp>
25#include <boost/archive/text_iarchive.hpp>
26#include <boost/serialization/vector.hpp>
27#include <boost/serialization/shared_ptr.hpp>
28#include <boost/serialization/version.hpp>
30#endif
31
32namespace RDKit {
33class ROMol;
34class ChemicalReaction;
35
36namespace ScaffoldNetwork {
37
39 bool includeGenericScaffolds =
40 true; ///< include scaffolds with all atoms replaced by dummies
41 bool includeGenericBondScaffolds =
42 false; ///< include scaffolds with all bonds replaced by single bonds
43 bool includeScaffoldsWithoutAttachments =
44 true; ///< remove attachment points from scaffolds and include the result
45 bool includeScaffoldsWithAttachments =
46 true; ///< Include the version of the scaffold with attachment points
47 bool keepOnlyFirstFragment =
48 true; ///< keep only the first fragment from the bond breaking rule
49 bool pruneBeforeFragmenting =
50 true; ///< Do a pruning/flattening step before starting fragmenting
51 bool flattenIsotopes = true; ///< remove isotopes when flattening
52 bool flattenChirality =
53 true; ///< remove chirality and bond stereo when flattening
54 bool flattenKeepLargest =
55 true; ///< keep only the largest fragment when doing flattening
56 bool collectMolCounts = true; ///< keep track of the number of molecules each
57 ///< scaffold was reached from
58
59 std::vector<std::shared_ptr<ChemicalReaction>>
60 bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
61 ///< single reactant and produce two products
63 : ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
64 ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
65};
66
67enum class EdgeType {
68 Fragment = 1, ///< molecule -> fragment
69 Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
70 GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
71 RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
72 Initialize = 5 ///< molecule -> flattened molecule
73};
74
76 size_t beginIdx;
77 size_t endIdx;
79 NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize) {}
80 NetworkEdge(size_t bi, size_t ei, EdgeType typ)
81 : beginIdx(bi), endIdx(ei), type(typ) {}
83 return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
84 }
86 return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
87 }
88#ifdef RDK_USE_BOOST_SERIALIZATION
89 private:
90 friend class boost::serialization::access;
91 template <class Archive>
92 void serialize(Archive &ar, const unsigned int version) {
93 RDUNUSED_PARAM(version);
94 ar & beginIdx;
95 ar & endIdx;
96 ar & type;
97 }
98#endif
99};
100
102 std::vector<std::string> nodes; ///< SMILES for the scaffolds
103 std::vector<unsigned>
104 counts; ///< number of times each scaffold was encountered
105 std::vector<unsigned>
106 molCounts; ///< number of molecules each scaffold was found in
107 std::vector<NetworkEdge> edges; ///< edges in the network
109#ifdef RDK_USE_BOOST_SERIALIZATION
110 ScaffoldNetwork(const std::string &pkl) {
111 std::stringstream iss(pkl);
112 boost::archive::text_iarchive ia(iss);
113 ia >> *this;
114 }
115
116 private:
117 friend class boost::serialization::access;
118 template <class Archive>
119 void serialize(Archive &ar, const unsigned int version) {
120 RDUNUSED_PARAM(version);
121 ar & nodes;
122 ar & counts;
123 if (version > 0) {
124 ar & molCounts;
125 }
126 ar & edges;
127 }
128#endif
129};
130
131//! update an existing ScaffoldNetwork using a set of molecules
132template <typename T>
134 const ScaffoldNetworkParams &params);
135
136//! create a new ScaffoldNetwork for a set of molecules
137template <typename T>
139 const ScaffoldNetworkParams &params) {
141 updateScaffoldNetwork(mols, res, params);
142 return res;
143}
144//! allows nodes to output nicely as strings
145inline std::ostream &operator<<(std::ostream &ostr,
147 switch (e) {
149 ostr << "Fragment";
150 break;
152 ostr << "Generic";
153 break;
155 ostr << "GenericBond";
156 break;
158 ostr << "RemoveAttachment";
159 break;
161 ostr << "Initialize";
162 break;
163 default:
164 ostr << "UNKNOWN";
165 break;
166 }
167 return ostr;
168}
169//! allows edges to output nicely as strings
170inline std::ostream &operator<<(std::ostream &ostr,
172 ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
173 << ", type:" << e.type << " )";
174 return ostr;
175}
176
177//! returns parameters for constructing scaffold networks using BRICS
178//! fragmentation
180
181} // namespace ScaffoldNetwork
182} // namespace RDKit
183
184#ifdef RDK_USE_BOOST_SERIALIZATION
185namespace boost {
186namespace serialization {
187template <>
188struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
189 BOOST_STATIC_CONSTANT(int, value = 1);
190};
191} // namespace serialization
192} // namespace boost
193#endif
194
195#endif
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::Atom &at)
allows Atom objects to be dumped to streams
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition export.h:473
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
@ Initialize
molecule -> flattened molecule
@ Fragment
molecule -> fragment
@ Generic
molecule -> generic molecule (all atoms are dummies)
@ RemoveAttachment
molecule -> molecule with no attachment points
@ GenericBond
molecule -> generic bond molecule (all bonds single)
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
Definition RDLog.h:25
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
std::vector< NetworkEdge > edges
edges in the network
std::vector< unsigned > molCounts
number of molecules each scaffold was found in
std::vector< std::string > nodes
SMILES for the scaffolds.
std::vector< unsigned > counts
number of times each scaffold was encountered