RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SmilesWrite.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESWRITE_H_012020
12#define RD_SMILESWRITE_H_012020
13
14#include <string>
15#include <vector>
16#include <memory>
17#include <cstdint>
18#include <limits>
19
20namespace RDKit {
21class Atom;
22class Bond;
23class ROMol;
24
26 bool doIsomericSmiles =
27 true; /**< include stereochemistry and isotope information */
28 bool doKekule = false; /**< kekulize the molecule before generating the SMILES
29 and output single/double bonds. NOTE that the output
30 is not canonical and that this will thrown an
31 exception if the molecule cannot be kekulized. */
32 bool canonical = true; /**< generate canonical SMILES */
33 bool allBondsExplicit = false; /**< include symbols for all bonds */
34 bool allHsExplicit = false; /**< provide hydrogen counts for every atom */
35 bool doRandom = false; /**< randomize the output order. The resulting SMILES
36 is not canonical */
37 int rootedAtAtom = -1; /**< make sure the SMILES starts at the specified
38 atom. The resulting SMILES is not canonical */
39 bool includeDativeBonds =
40 true; /**< include the RDKit extension for dative bonds. Otherwise dative
41 bonds will be written as single bonds*/
42 bool ignoreAtomMapNumbers = false; /**< If true, ignores any atom map numbers
43 when canonicalizing the molecule */
44};
45
46namespace SmilesWrite {
47
48#define CXSMILESFIELDS_ENUM_ITEMS \
49 CXSMILESFIELDS_ENUM_ITEM(CX_NONE, 0) \
50 CXSMILESFIELDS_ENUM_ITEM(CX_ATOM_LABELS, 1 << 0) \
51 CXSMILESFIELDS_ENUM_ITEM(CX_MOLFILE_VALUES, 1 << 1) \
52 CXSMILESFIELDS_ENUM_ITEM(CX_COORDS, 1 << 2) \
53 CXSMILESFIELDS_ENUM_ITEM(CX_RADICALS, 1 << 3) \
54 CXSMILESFIELDS_ENUM_ITEM(CX_ATOM_PROPS, 1 << 4) \
55 CXSMILESFIELDS_ENUM_ITEM(CX_LINKNODES, 1 << 5) \
56 CXSMILESFIELDS_ENUM_ITEM(CX_ENHANCEDSTEREO, 1 << 6) \
57 CXSMILESFIELDS_ENUM_ITEM(CX_SGROUPS, 1 << 7) \
58 CXSMILESFIELDS_ENUM_ITEM(CX_POLYMER, 1 << 8) \
59 CXSMILESFIELDS_ENUM_ITEM(CX_BOND_CFG, 1 << 9) \
60 CXSMILESFIELDS_ENUM_ITEM(CX_BOND_ATROPISOMER, 1 << 10) \
61 CXSMILESFIELDS_ENUM_ITEM(CX_COORDINATE_BONDS, 1 << 11) \
62 CXSMILESFIELDS_ENUM_ITEM(CX_ALL, 0x7fffffff) \
63 CXSMILESFIELDS_ENUM_ITEM(CX_ALL_BUT_COORDS, CX_ALL ^ CX_COORDS)
64
65#define CXSMILESFIELDS_ENUM_ITEM(k, v) k = (v),
67#undef CXSMILESFIELDS_ENUM_ITEM
68#define CXSMILESFIELDS_STD_MAP_ITEM(k) {#k, SmilesWrite::CXSmilesFields::k},
69#define CXSMILESFIELDS_ENUM_ITEM(k, v) CXSMILESFIELDS_STD_MAP_ITEM(k)
70#define CXSMILESFIELDS_ITEMS_MAP \
71 std::map<std::string, SmilesWrite::CXSmilesFields> { \
72 CXSMILESFIELDS_ENUM_ITEMS \
73 }
74
75//! \brief returns the cxsmiles data for a molecule
77 const ROMol &mol, std::uint32_t flags = CXSmilesFields::CX_ALL);
78
79//! \brief returns true if the atom number is in the SMILES organic subset
81
82//! \brief returns the SMILES for an atom
83/*!
84 \param atom : the atom to work with
85 \param ps : the parameters controlling the SMILES generation
86*/
88 const SmilesWriteParams &ps);
89
90//! \brief returns the SMILES for an atom
91/*!
92 \param atom : the atom to work with
93 \param doKekule : we're doing kekulized smiles (e.g. don't use
94 lower case for the atom label)
95 \param bondIn : the bond we came into the atom on (unused)
96 \param allHsExplicit : if true, hydrogen counts will be provided for every
97 atom.
98 \param isomericSmiles : if true, isomeric SMILES will be generated
99*/
100inline std::string GetAtomSmiles(const Atom *atom, bool doKekule = false,
101 const Bond * = nullptr,
102 bool allHsExplicit = false,
103 bool isomericSmiles = true) {
104 // RDUNUSED_PARAM(bondIn);
107 ps.doKekule = doKekule;
108 ps.allHsExplicit = allHsExplicit;
109 return GetAtomSmiles(atom, ps);
110};
111
112//! \brief returns the SMILES for a bond
113/*!
114 \param bond : the bond to work with
115 \param ps : the parameters controlling the SMILES generation
116 \param atomToLeftIdx : the index of the atom preceding \c bond
117 in the SMILES
118*/
120 const SmilesWriteParams &ps,
121 int atomToLeftIdx = -1);
122//! \brief returns the SMILES for a bond
123/*!
124 \param bond : the bond to work with
125 \param atomToLeftIdx : the index of the atom preceding \c bond
126 in the SMILES
127 \param doKekule : we're doing kekulized smiles (e.g. write out
128 bond orders for aromatic bonds)
129 \param allBondsExplicit : if true, symbols will be included for all bonds.
130*/
131inline std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx = -1,
132 bool doKekule = false,
133 bool allBondsExplicit = false) {
135 ps.doKekule = doKekule;
136 ps.allBondsExplicit = allBondsExplicit;
137 ps.doIsomericSmiles = false;
138 return GetBondSmiles(bond, ps, atomToLeftIdx);
139};
140
141namespace detail {
143 const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles);
144}
145
146} // namespace SmilesWrite
147
148//! \brief returns canonical SMILES for a molecule
150 const ROMol &mol, const SmilesWriteParams &params);
151
152//! \brief returns SMILES for a molecule, canonical by default
153/*!
154 \param mol : the molecule in question.
155 \param doIsomericSmiles : include stereochemistry and isotope information
156 in the SMILES
157
158 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
159 this will throw an exception if the molecule cannot be kekulized.
160
161 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
162 The resulting SMILES is not, of course, canonical.
163 \param canonical : if false, no attempt will be made to canonicalize the
164 SMILES
165 \param allBondsExplicit : if true, symbols will be included for all bonds.
166 \param allHsExplicit : if true, hydrogen counts will be provided for every
167 atom.
168 \param doRandom : if true, the first atom in the SMILES string will be
169 selected at random and the SMILES string will not be canonical
170 \param ignoreAtomMapNumbers : if true, ignores any atom map numbers when
171 canonicalizing the molecule
172 */
173inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
174 bool doKekule = false, int rootedAtAtom = -1,
175 bool canonical = true,
176 bool allBondsExplicit = false,
177 bool allHsExplicit = false,
178 bool doRandom = false,
179 bool ignoreAtomMapNumbers = false) {
181 ps.doIsomericSmiles = doIsomericSmiles;
182 ps.doKekule = doKekule;
183 ps.rootedAtAtom = rootedAtAtom;
184 ps.canonical = canonical;
185 ps.allBondsExplicit = allBondsExplicit;
186 ps.allHsExplicit = allHsExplicit;
187 ps.doRandom = doRandom;
188 ps.ignoreAtomMapNumbers = ignoreAtomMapNumbers;
189 return MolToSmiles(mol, ps);
190};
191
192//! \brief returns a vector of random SMILES for a molecule (may contain
193//! duplicates)
194/*!
195 \param mol : the molecule in question.
196 \param numSmiles : the number of SMILES to return
197 \param randomSeed : if >0, will be used to seed the random number generator
198 \param doIsomericSmiles : include stereochemistry and isotope information
199 in the SMILES
200 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
201 \param allBondsExplicit : if true, symbols will be included for all bonds.
202 \param allHsExplicit : if true, hydrogen counts will be provided for every
203 atom.
204 */
206 const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
207 bool doIsomericSmiles = true, bool doKekule = false,
208 bool allBondsExplicit = false, bool allHsExplicit = false);
209
210//! \brief returns canonical SMILES for part of a molecule
212 const ROMol &mol, const SmilesWriteParams &params,
213 const std::vector<int> &atomsToUse,
214 const std::vector<int> *bondsToUse = nullptr,
215 const std::vector<std::string> *atomSymbols = nullptr,
216 const std::vector<std::string> *bondSymbols = nullptr);
217
218//! \brief returns canonical SMILES for part of a molecule
219/*!
220 \param mol : the molecule in question.
221 \param atomsToUse : indices of the atoms in the fragment
222 \param bondsToUse : indices of the bonds in the fragment. If this is not
223 provided,
224 all bonds between the atoms in atomsToUse will be included
225 \param atomSymbols : symbols to use for the atoms in the output SMILES
226 \param bondSymbols : symbols to use for the bonds in the output SMILES
227 \param doIsomericSmiles : include stereochemistry and isotope information
228 in the SMILES
229 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
230 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
231 The resulting SMILES is not, of course, canonical.
232 \param canonical : if false, no attempt will be made to canonicalize the
233 SMILES
234 \param allBondsExplicit : if true, symbols will be included for all bonds.
235 \param allHsExplicit : if true, hydrogen counts will be provided for every
236 atom.
237 \param doRandom : generate a randomized smiles string by randomly choosing
238 the priority to follow in the DFS traversal. [default false]
239
240 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
241
242 */
243inline std::string MolFragmentToSmiles(
244 const ROMol &mol, const std::vector<int> &atomsToUse,
245 const std::vector<int> *bondsToUse = nullptr,
246 const std::vector<std::string> *atomSymbols = nullptr,
247 const std::vector<std::string> *bondSymbols = nullptr,
248 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
249 bool canonical = true, bool allBondsExplicit = false,
250 bool allHsExplicit = false) {
252 ps.doIsomericSmiles = doIsomericSmiles;
253 ps.doKekule = doKekule;
254 ps.rootedAtAtom = rootedAtAtom;
255 ps.canonical = canonical;
256 ps.allBondsExplicit = allBondsExplicit;
257 ps.allHsExplicit = allHsExplicit;
258 return MolFragmentToSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
260}
261
262#define RESTOREBONDDIROPTION_ENUM_ITEMS \
263 RESTOREBONDDIROPTION_ENUM_ITEM(RestoreBondDirOptionTrue, \
264 0) /*!< DO restore bond dirs */ \
265 RESTOREBONDDIROPTION_ENUM_ITEM(RestoreBondDirOptionClear, \
266 1) /*!< clear all bond dir information */
267
268#define RESTOREBONDDIROPTION_ENUM_ITEM(k, v) k = v,
270#undef RESTOREBONDDIROPTION_ENUM_ITEM
271#define RESTOREBONDDIROPTION_STD_MAP_ITEM(k) {#k, k},
272#define RESTOREBONDDIROPTION_ENUM_ITEM(k, v) \
273 RESTOREBONDDIROPTION_STD_MAP_ITEM(k)
274#define RESTOREBONDDIROPTION_ITEMS_MAP \
275 std::map<std::string, RestoreBondDirOption> { \
276 RESTOREBONDDIROPTION_ENUM_ITEMS \
277 }
278
279//! \brief returns canonical CXSMILES for a molecule
281 const ROMol &mol, const SmilesWriteParams &ps,
282 std::uint32_t flags = SmilesWrite::CXSmilesFields::CX_ALL,
284
285//! \brief returns canonical CXSMILES for a molecule
286/*!
287 \param mol : the molecule in question.
288 \param doIsomericSmiles : include stereochemistry and isotope information
289 in the SMILES
290 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
291 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
292 The resulting SMILES is not, of course, canonical.
293 \param canonical : if false, no attempt will be made to canonicalize the
294 SMILES
295 \param allBondsExplicit : if true, symbols will be included for all bonds.
296 \param allHsExplicit : if true, hydrogen counts will be provided for every
297 \param doRandom : generate a randomized smiles string by randomly choosing
298 the priority to follow in the DFS traversal. [default false]
299 atom.
300 */
301inline std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles = true,
302 bool doKekule = false, int rootedAtAtom = -1,
303 bool canonical = true,
304 bool allBondsExplicit = false,
305 bool allHsExplicit = false,
306 bool doRandom = false) {
308 ps.doIsomericSmiles = doIsomericSmiles;
309 ps.doKekule = doKekule;
310 ps.rootedAtAtom = rootedAtAtom;
311 ps.canonical = canonical;
312 ps.allBondsExplicit = allBondsExplicit;
313 ps.allHsExplicit = allHsExplicit;
314 ps.doRandom = doRandom;
315 return MolToCXSmiles(mol, ps, SmilesWrite::CXSmilesFields::CX_ALL);
316};
317
318//! \brief returns canonical CXSMILES for part of a molecule
320 const ROMol &mol, const SmilesWriteParams &params,
321 const std::vector<int> &atomsToUse,
322 const std::vector<int> *bondsToUse = nullptr,
323 const std::vector<std::string> *atomSymbols = nullptr,
324 const std::vector<std::string> *bondSymbols = nullptr);
325
326//! \brief returns canonical CXSMILES for part of a molecule
327/*!
328 \param mol : the molecule in question.
329 \param atomsToUse : indices of the atoms in the fragment
330 \param bondsToUse : indices of the bonds in the fragment. If this is not
331 provided,
332 all bonds between the atoms in atomsToUse will be included
333 \param atomSymbols : symbols to use for the atoms in the output SMILES
334 \param bondSymbols : symbols to use for the bonds in the output SMILES
335 \param doIsomericSmiles : include stereochemistry and isotope information
336 in the SMILES
337 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
338 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
339 The resulting SMILES is not, of course, canonical.
340 \param canonical : if false, no attempt will be made to canonicalize the
341 SMILES
342 \param allBondsExplicit : if true, symbols will be included for all bonds.
343 \param allHsExplicit : if true, hydrogen counts will be provided for every
344 atom.
345
346 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
347
348 */
349inline std::string MolFragmentToCXSmiles(
350 const ROMol &mol, const std::vector<int> &atomsToUse,
351 const std::vector<int> *bondsToUse = nullptr,
352 const std::vector<std::string> *atomSymbols = nullptr,
353 const std::vector<std::string> *bondSymbols = nullptr,
354 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
355 bool canonical = true, bool allBondsExplicit = false,
356 bool allHsExplicit = false) {
358 ps.doIsomericSmiles = doIsomericSmiles;
359 ps.doKekule = doKekule;
360 ps.rootedAtAtom = rootedAtAtom;
361 ps.canonical = canonical;
362 ps.allBondsExplicit = allBondsExplicit;
363 ps.allHsExplicit = allHsExplicit;
364 return MolFragmentToCXSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
366}
367
369 const std::string &details_json);
371 const char *details_json);
374 const std::string &details_json);
377 const char *details_json);
378
379} // namespace RDKit
380#endif
The class for representing atoms.
Definition Atom.h:75
class for representing a bond
Definition Bond.h:47
#define RDKIT_SMILESPARSE_EXPORT
Definition export.h:497
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles)
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams &ps)
returns the SMILES for an atom
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber)
returns true if the atom number is in the SMILES organic subset
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond, const SmilesWriteParams &ps, int atomToLeftIdx=-1)
returns the SMILES for a bond
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol, std::uint32_t flags=CXSmilesFields::CX_ALL)
returns the cxsmiles data for a molecule
Std stuff.
RDKIT_SMILESPARSE_EXPORT std::vector< std::string > MolToRandomSmilesVect(const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed=0, bool doIsomericSmiles=true, bool doKekule=false, bool allBondsExplicit=false, bool allHsExplicit=false)
returns a vector of random SMILES for a molecule (may contain duplicates)
void updateSmilesWriteParamsFromJSON(SmilesWriteParams &params, const std::string &details_json)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical SMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(const ROMol &mol, const SmilesWriteParams &ps, std::uint32_t flags=SmilesWrite::CXSmilesFields::CX_ALL, RestoreBondDirOption restoreBondDirs=RestoreBondDirOptionClear)
returns canonical CXSMILES for a molecule
void updateCXSmilesFieldsFromJSON(SmilesWrite::CXSmilesFields &cxSmilesFields, RestoreBondDirOption &restoreBondDirs, const std::string &details_json)
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical CXSMILES for part of a molecule
RestoreBondDirOption
@ RESTOREBONDDIROPTION_ENUM_ITEMS