RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSet.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#ifndef RDKIT_SYNTHONSET_H
12#define RDKIT_SYNTHONSET_H
13
14#include <iosfwd>
15#include <string>
16#include <vector>
17
18#include <boost/dynamic_bitset.hpp>
19
20#include <RDGeneral/export.h>
24
25namespace RDKit {
26class ROMol;
27
28namespace SynthonSpaceSearch {
29class Synthon;
30struct SynthonSpaceSearchParams;
31
32// This class holds all the synthons for a particular reaction.
34 public:
35 SynthonSet() = default;
36 explicit SynthonSet(const std::string &id) : d_id(id) {}
37 SynthonSet(const SynthonSet &rhs) = delete;
38 SynthonSet(SynthonSet &&rhs) = delete;
39
40 const std::string &getId() const { return d_id; }
41 const std::vector<std::vector<std::unique_ptr<Synthon>>> &getSynthons()
42 const {
43 return d_synthons;
44 }
45 const boost::dynamic_bitset<> &getConnectors() const { return d_connectors; }
46 const std::vector<boost::dynamic_bitset<>> &getSynthonConnectorPatterns()
47 const {
48 return d_synthConnPatts;
49 }
50 const std::vector<std::shared_ptr<ROMol>> &getConnectorRegions() const;
51
52 const std::unique_ptr<ExplicitBitVect> &getConnRegFP() const;
53 const std::unique_ptr<ExplicitBitVect> &getAddFP() const;
54 const std::unique_ptr<ExplicitBitVect> &getSubtractFP() const;
55 const std::vector<int> &getNumConnectors() const;
56 bool hasFingerprints() const;
58
59 const std::vector<std::vector<std::unique_ptr<ExplicitBitVect>>> &
61 // Writes to/reads from a binary stream.
62 void writeToDBStream(std::ostream &os) const;
63 void readFromDBStream(std::istream &is, std::uint32_t version);
64 // write the enumerated molecules to the stream in SMILES format.
65 void enumerateToStream(std::ostream &os) const;
66
67 // SynthonSet takes control of the newSynthon and manages it.
68 void addSynthon(int synthonSetNum, std::unique_ptr<Synthon> newSynthon);
69
70 // Sometimes the synthon sets are numbered from 1 in the text file,
71 // in which case there'll be an empty set 0.
73
74 // The bonds in the synthons may not be the same as in the products, and
75 // this is a problem for aromatic ring creation in particular. Such as:
76 // [1*]=CC=C[2*] and [1*]Nc1c([2*])cccc1 giving c1ccc2ncccc2c1. So
77 // transfer the types of bonds from the products to the synthons.
79
80 // Build the connector regions and their fingerprints. Only used when
81 // creating a SynthonSpace from a text file.
83
84 // Scan through the connectors ([1*], [2*] etc.) in the synthons
85 // and set bits in d_connectors accordingly. Also removes any empty
86 // reagent sets, which might be because the synthon numbers start from
87 // 1 rather than 0. Only used when creating a SynthonSpace from a text
88 // file.
90
94
95 // Return the molecules for synthons for which the bits are true.
96 // Obviously requires that reqSynths is the same dimensions as
97 // d_synthons.
98 std::vector<std::vector<ROMol *>> getSynthons(
99 const std::vector<boost::dynamic_bitset<>> &reqSynths) const;
100
101 std::string buildProductName(const std::vector<size_t> &synthNums) const;
102 std::unique_ptr<ROMol> buildProduct(
103 const std::vector<size_t> &synthNums) const;
104
105 private:
106 std::string d_id;
107 // The lists of synthons. A product of the reaction is created by
108 // combining 1 synthon from each of the outer vectors.
109 std::vector<std::vector<std::unique_ptr<Synthon>>> d_synthons;
110 // 4 bits showing which connectors are present in all the
111 // synthon sets.
112 boost::dynamic_bitset<> d_connectors;
113 // and the connector patterns for each synthon set.
114 std::vector<boost::dynamic_bitset<>> d_synthConnPatts;
115
116 // The connector regions of a molecule are the pieces of up to 3 bonds from
117 // a connector atom into the molecule. We keep a vector of all the ones
118 // present in the synthons in the set, plus a fingerprint of all their
119 // fingerprints folded into 1. If a query fragment doesn't have a
120 // connector region in common with any of the synthons it can be assumed that
121 // the fragment won't have a match in this SynthonSet.
122 std::vector<std::shared_ptr<ROMol>> d_connectorRegions;
123 // The fingerprint of the connector regions. Fingerprints for all
124 // connector regions are folded into the same fingerprint.
125 std::unique_ptr<ExplicitBitVect> d_connRegFP;
126
127 // When doing an approximate FP similarity by ORing together
128 // the synthonFPs, adding d_addFP and subtracting d_subtractFP
129 // accounts (a bit) for the joins and the dummy atoms
130 // respectively.
131 std::unique_ptr<ExplicitBitVect> d_addFP;
132 std::unique_ptr<ExplicitBitVect> d_subtractFP;
133
134 // The number of connectors in the synthons in each synthon set.
135 std::vector<int> d_numConnectors;
136
137 // The fingerprints for the synthons for use with a fingerprint similarity
138 // search. They are not properties of the Synthons because they are not
139 // generated directly from them, as explained in buildSynthonFingerprints.
140 std::vector<std::vector<std::unique_ptr<ExplicitBitVect>>> d_synthonFPs;
141
142 // Tag each atom and bond in each synthon with its index and the synthon
143 // set number it came from.
144 void tagSynthonAtomsAndBonds() const;
145};
146
147} // namespace SynthonSpaceSearch
148
149} // namespace RDKit
150
151#endif // RDKIT_SYNTHONSET_H
class that generates same fingerprint style for different output formats
const std::unique_ptr< ExplicitBitVect > & getSubtractFP() const
const std::vector< std::vector< std::unique_ptr< Synthon > > > & getSynthons() const
Definition SynthonSet.h:41
const std::unique_ptr< ExplicitBitVect > & getAddFP() const
SynthonSet(SynthonSet &&rhs)=delete
void enumerateToStream(std::ostream &os) const
const std::unique_ptr< ExplicitBitVect > & getConnRegFP() const
void readFromDBStream(std::istream &is, std::uint32_t version)
const std::vector< std::vector< std::unique_ptr< ExplicitBitVect > > > & getSynthonFPs() const
std::unique_ptr< ROMol > buildProduct(const std::vector< size_t > &synthNums) const
void writeToDBStream(std::ostream &os) const
void addSynthon(int synthonSetNum, std::unique_ptr< Synthon > newSynthon)
void buildSynthonFingerprints(const FingerprintGenerator< std::uint64_t > &fpGen)
const std::vector< std::shared_ptr< ROMol > > & getConnectorRegions() const
SynthonSet(const std::string &id)
Definition SynthonSet.h:36
std::string buildProductName(const std::vector< size_t > &synthNums) const
void buildAddAndSubtractFPs(const FingerprintGenerator< std::uint64_t > &fpGen)
const std::vector< int > & getNumConnectors() const
std::vector< std::vector< ROMol * > > getSynthons(const std::vector< boost::dynamic_bitset<> > &reqSynths) const
const std::vector< boost::dynamic_bitset<> > & getSynthonConnectorPatterns() const
Definition SynthonSet.h:46
const boost::dynamic_bitset & getConnectors() const
Definition SynthonSet.h:45
SynthonSet(const SynthonSet &rhs)=delete
const std::string & getId() const
Definition SynthonSet.h:40
#define RDKIT_SYNTHONSPACESEARCH_EXPORT
Definition export.h:545
Std stuff.