RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MOL_OPS_H
12#define RD_MOL_OPS_H
13
14#include <vector>
15#include <map>
16#include <list>
18#include <boost/smart_ptr.hpp>
19#include <boost/dynamic_bitset.hpp>
21#include <RDGeneral/types.h>
22#include "SanitException.h"
24
25RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26namespace RDKit {
27class ROMol;
28class RWMol;
29class Atom;
30class Bond;
31class Conformer;
32typedef std::vector<double> INVAR_VECT;
33typedef INVAR_VECT::iterator INVAR_VECT_I;
34typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35
36//! \brief Groups a variety of molecular query and transformation operations.
37namespace MolOps {
38
39//! return the number of electrons available on an atom to donate for
40/// aromaticity
41/*!
42 The result is determined using the default valency, number of lone pairs,
43 number of bonds and the formal charge. Note that the atom may not donate
44 all of these electrons to a ring for aromaticity (also used in Conjugation
45 and hybridization code).
46
47 \param at the atom of interest
48
49 \return the number of electrons
50*/
52
53//! sums up all atomic formal charges and returns the result
55
56//! returns whether or not the given Atom is involved in a conjugated bond
58
59//! find fragments (disconnected components of the molecular graph)
60/*!
61
62 \param mol the molecule of interest
63 \param mapping used to return the mapping of Atoms->fragments.
64 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65 and will contain the fragment assignment for each Atom
66
67 \return the number of fragments found.
68
69*/
70RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71 std::vector<int> &mapping);
72//! find fragments (disconnected components of the molecular graph)
73/*!
74
75 \param mol the molecule of interest
76 \param frags used to return the Atoms in each fragment
77 On return \c mapping will be \c numFrags long, and each entry
78 will contain the indices of the Atoms in that fragment.
79
80 \return the number of fragments found.
81
82*/
84 const ROMol &mol, std::vector<std::vector<int>> &frags);
85
86//! splits a molecule into its component fragments
87/// (disconnected components of the molecular graph)
88/*!
89
90 \param mol the molecule of interest
91 \param molFrags used to return the disconnected fragments as molecules.
92 Any contents on input will be cleared.
93 \param sanitizeFrags toggles sanitization of the fragments after
94 they are built
95 \param frags used to return the mapping of Atoms->fragments.
96 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
97 on return and will contain the fragment assignment for each Atom.
98 \param fragsMolAtomMapping used to return the Atoms in each fragment
99 On return \c mapping will be \c numFrags long, and each entry
100 will contain the indices of the Atoms in that fragment.
101 \param copyConformers toggles copying conformers of the fragments after
102 they are built
103 \return the number of fragments found.
104
105*/
107 const ROMol &mol, std::vector<std::unique_ptr<ROMol>> &molFrags,
108 bool sanitizeFrags = true, std::vector<int> *frags = nullptr,
109 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
110 bool copyConformers = true);
111
112//! splits a molecule into its component fragments
113/// (disconnected components of the molecular graph)
114/*!
115
116 \param mol the molecule of interest
117 \param sanitizeFrags toggles sanitization of the fragments after
118 they are built
119 \param frags used to return the mapping of Atoms->fragments.
120 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
121 on return and will contain the fragment assignment for each Atom
122 \param fragsMolAtomMapping used to return the Atoms in each fragment
123 On return \c mapping will be \c numFrags long, and each entry
124 will contain the indices of the Atoms in that fragment.
125 \param copyConformers toggles copying conformers of the fragments after
126 they are built
127 \return a vector of the fragments as smart pointers to ROMols
128
129*/
130RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
131 const ROMol &mol, bool sanitizeFrags = true,
132 std::vector<int> *frags = nullptr,
133 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
134 bool copyConformers = true);
135
136//! splits a molecule into pieces based on labels assigned using a query
137/*!
138
139 \param mol the molecule of interest
140 \param query the query used to "label" the molecule for fragmentation
141 \param sanitizeFrags toggles sanitization of the fragments after
142 they are built
143 \param whiteList if provided, only labels in the list will be kept
144 \param negateList if true, the white list logic will be inverted: only labels
145 not in the list will be kept
146
147 \return a map of the fragments and their labels
148
149*/
150
151template <typename T>
152RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
153getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
154 bool sanitizeFrags = true,
155 const std::vector<T> *whiteList = nullptr,
156 bool negateList = false);
157//! splits a molecule into pieces based on labels assigned using a query,
158//! putting them into a map of std::unique_ptr<ROMol>.
159/*!
160
161 \param mol the molecule of interest
162 \param query the query used to "label" the molecule for fragmentation
163 \param molFrags used to return the disconnected fragments as molecules.
164 Any contents on input will be cleared.
165 \param sanitizeFrags toggles sanitization of the fragments after
166 they are built
167 \param whiteList if provided, only labels in the list will be kept
168 \param negateList if true, the white list logic will be inverted: only labels
169 not in the list will be kept
170
171 \return the number of fragments
172
173*/
174template <typename T>
176 const ROMol &mol, T (*query)(const ROMol &, const Atom *),
177 std::map<T, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags = true,
178 const std::vector<T> *whiteList = nullptr, bool negateList = false);
179
180#if 0
181 //! finds a molecule's minimum spanning tree (MST)
182 /*!
183 \param mol the molecule of interest
184 \param mst used to return the MST as a vector of bond indices
185 */
186 RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
187#endif
188
189//! \name Dealing with hydrogens
190//{@
191
192//! returns a copy of a molecule with hydrogens added in as explicit Atoms
193/*!
194 \param mol the molecule to add Hs to
195 \param explicitOnly (optional) if this \c true, only explicit Hs will be
196 added
197 \param addCoords (optional) If this is true, estimates for the atomic
198 coordinates
199 of the added Hs will be used.
200 \param onlyOnAtoms (optional) if provided, this should be a vector of
201 IDs of the atoms that will be considered for H addition.
202 \param addResidueInfo (optional) if this is true, add residue info to
203 hydrogen atoms (useful for PDB files).
204
205 \return the new molecule
206
207 <b>Notes:</b>
208 - it makes no sense to use the \c addCoords option if the molecule's
209 heavy
210 atoms don't already have coordinates.
211 - the caller is responsible for <tt>delete</tt>ing the pointer this
212 returns.
213 */
215 bool addCoords = false,
216 const UINT_VECT *onlyOnAtoms = nullptr,
217 bool addResidueInfo = false);
218//! \overload
219/// modifies the molecule in place
221 bool addCoords = false,
222 const UINT_VECT *onlyOnAtoms = nullptr,
223 bool addResidueInfo = false);
224
225//! Sets Cartesian coordinates for a terminal atom.
226//! Useful for growing an atom off a molecule with sensible
227//! coordinates based on the geometry of the neighbor.
228/*!
229 NOTE: this sets appropriate coordinates in all of the molecule's conformers.
230 \param mol the molecule the atoms belong to
231 \param idx index of the terminal atom whose coordinates are set
232 \param otherIdx index of the bonded neighbor atom
233*/
234
236 unsigned int otherIdx);
237
238//! returns a copy of a molecule with hydrogens removed
239/*!
240 \param mol the molecule to remove Hs from
241 \param implicitOnly (optional) if this \c true, only implicit Hs will be
242 removed
243 \param updateExplicitCount (optional) If this is \c true, when explicit Hs
244 are removed
245 from the graph, the heavy atom to which they are bound will have its
246 counter of
247 explicit Hs increased.
248 \param sanitize: (optional) If this is \c true, the final molecule will be
249 sanitized
250
251 \return the new molecule
252
253 <b>Notes:</b>
254 - Hydrogens which aren't connected to a heavy atom will not be
255 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
256 all atoms removed.
257 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
258 will not be removed.
259 - two coordinate Hs, like the central H in C[H-]C, will not be removed
260 - Hs connected to dummy atoms will not be removed
261 - Hs that are part of the definition of double bond Stereochemistry
262 will not be removed
263 - Hs that are not connected to anything else will not be removed
264 - Hs that have a query defined (i.e. hasQuery() returns true) will not
265 be removed
266
267 - the caller is responsible for <tt>delete</tt>ing the pointer this
268 returns.
269*/
270
272 bool implicitOnly = false,
273 bool updateExplicitCount = false,
274 bool sanitize = true);
275//! \overload
276/// modifies the molecule in place
278 bool updateExplicitCount = false,
279 bool sanitize = true);
281 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
282 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
283 bool removeOnlyHNeighbors =
284 false; /**< hydrogens with bonds only to other hydrogens */
285 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
286 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
287 isotopes and keeps track of the heavy atom the isotopes were attached to in
288 the private _isotopicHs atom property, so they are re-added by AddHs() as the
289 original isotopes if possible*/
290 bool removeDummyNeighbors =
291 false; /**< hydrogens with at least one dummy-atom neighbor */
292 bool removeDefiningBondStereo =
293 false; /**< hydrogens defining bond stereochemistry */
294 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
295 bool removeWithQuery = false; /**< hydrogens with queries defined */
296 bool removeMapped = true; /**< mapped hydrogens */
297 bool removeInSGroups = true; /**< part of a SubstanceGroup.
298 An H atom will only be removed if it doesn't cause any SGroup to become empty,
299 and if it doesn't play a special role in the SGroup (XBOND, attach point
300 or a CState) */
301 bool showWarnings = true; /**< display warnings for Hs that are not removed */
302 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
303 bool updateExplicitCount =
304 false; /**< DEPRECATED equivalent of updateExplicitCount */
305 bool removeHydrides = true; /**< Removing Hydrides */
306 bool removeNontetrahedralNeighbors =
307 false; /**< remove Hs which are bonded to atoms with specified
308 non-tetrahedral stereochemistry */
309};
310//! \overload
311/// modifies the molecule in place
313 bool sanitize = true);
314//! \overload
315/// The caller owns the pointer this returns
317 const RemoveHsParameters &ps,
318 bool sanitize = true);
319
320//! removes all Hs from a molecule
321RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
322//! \overload
323/// The caller owns the pointer this returns
325 bool sanitize = true);
326
327//! returns a copy of a molecule with hydrogens removed and added as queries
328//! to the heavy atoms to which they are bound.
329/*!
330 This is really intended to be used with molecules that contain QueryAtoms
331
332 \param mol the molecule to remove Hs from
333
334 \return the new molecule
335
336 <b>Notes:</b>
337 - Atoms that do not already have hydrogen count queries will have one
338 added, other H-related queries will not be touched. Examples:
339 - C[H] -> [C;!H0]
340 - [C;H1][H] -> [C;H1]
341 - [C;H2][H] -> [C;H2]
342 - Hydrogens which aren't connected to a heavy atom will not be
343 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
344 all atoms removed.
345 - the caller is responsible for <tt>delete</tt>ing the pointer this
346 returns.
347 - By default all hydrogens are removed, however if
348 mergeUnmappedOnly is true, any hydrogen participating
349 in an atom map will be retained
350
351*/
353 bool mergeUnmappedOnly = false,
354 bool mergeIsotopes = false);
355//! \overload
356/// modifies the molecule in place
358 bool mergeUnmappedOnly = false,
359 bool mergeIsotopes = false);
360
361//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
362/*!
363 This is really intended to be used with molecules that contain QueryAtoms
364 such as when checking smarts patterns for explicit hydrogens
365
366
367 \param mol the molecule to check for query Hs from
368 \return std::pair if pair.first is true if the molecule has query hydrogens,
369 if pair.second is true, the queryHs cannot be removed my mergeQueryHs
370*/
371RDKIT_GRAPHMOL_EXPORT std::pair<bool, bool> hasQueryHs(const ROMol &mol);
372
382
383//! Parameters controlling the behavior of MolOps::adjustQueryProperties
384/*!
385
386 Note that some of the options here are either directly contradictory or make
387 no sense when combined with each other. We generally assume that client code
388 is doing something sensible and don't attempt to detect possible conflicts or
389 problems.
390
391*/
393 bool adjustDegree = true; /**< add degree queries */
394 std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
395
396 bool adjustRingCount = false; /**< add ring-count queries */
397 std::uint32_t adjustRingCountFlags =
399
400 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
401 labels to any-atom queries */
402
403 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
404
405 bool makeBondsGeneric =
406 false; /**< convert bonds to generic queries (any bonds) */
407 std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
408
409 bool makeAtomsGeneric =
410 false; /**< convert atoms to generic queries (any atoms) */
411 std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
412
413 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
414 overall degree */
415 std::uint32_t adjustHeavyDegreeFlags =
417
418 bool adjustRingChain = false; /**< add ring-chain queries */
419 std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
420
421 bool useStereoCareForBonds =
422 false; /**< remove stereochemistry info from double bonds that do not have
423 the stereoCare property set */
424
425 bool adjustConjugatedFiveRings =
426 false; /**< sets bond queries in conjugated five-rings to
427 SINGLE|DOUBLE|AROMATIC */
428
429 bool setMDLFiveRingAromaticity =
430 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
431 software as documented in the Chemical Representation Guide */
432
433 bool adjustSingleBondsToDegreeOneNeighbors =
434 false; /**< sets single bonds between aromatic or conjugated atoms and
435 degree one neighbors to SINGLE|AROMATIC */
436
437 bool adjustSingleBondsBetweenAromaticAtoms =
438 false; /**< sets non-ring single bonds between two aromatic or conjugated
439 atoms to SINGLE|AROMATIC */
440
441 //! \brief returns an AdjustQueryParameters object with all adjustments
442 //! disabled
445 res.adjustDegree = false;
446 res.makeDummiesQueries = false;
447 res.aromatizeIfPossible = false;
448 return res;
449 }
451};
452
453//! updates an AdjustQueryParameters object from a JSON string
455 MolOps::AdjustQueryParameters &p, const std::string &json);
456
457//! returns a copy of a molecule with query properties adjusted
458/*!
459 \param mol the molecule to adjust
460 \param params controls the adjustments made
461
462 \return the new molecule, the caller owns the memory
463*/
465 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
466//! \overload
467/// modifies the molecule in place
469 RWMol &mol, const AdjustQueryParameters *params = nullptr);
470
471//! returns a copy of a molecule with the atoms renumbered
472/*!
473
474 \param mol the molecule to work with
475 \param newOrder the new ordering of the atoms (should be numAtoms long)
476 for example: if newOrder is [3,2,0,1], then atom 3 in the original
477 molecule will be atom 0 in the new one
478
479 \return the new molecule
480
481 <b>Notes:</b>
482 - the caller is responsible for <tt>delete</tt>ing the pointer this
483 returns.
484
485*/
487 const ROMol &mol, const std::vector<unsigned int> &newOrder);
488
489//! @}
490
491//! \name Sanitization
492/// {
493
510
511//! \brief carries out a collection of tasks for cleaning up a molecule and
512//! ensuring that it makes "chemical sense"
513/*!
514 This functions calls the following in sequence
515 -# MolOps::cleanUp()
516 -# mol.updatePropertyCache()
517 -# MolOps::symmetrizeSSSR()
518 -# MolOps::Kekulize()
519 -# MolOps::assignRadicals()
520 -# MolOps::setAromaticity()
521 -# MolOps::setConjugation()
522 -# MolOps::setHybridization()
523 -# MolOps::cleanupChirality()
524 -# MolOps::adjustHs()
525 -# mol.updatePropertyCache()
526
527 \param mol : the RWMol to be cleaned
528
529 \param operationThatFailed : the first (if any) sanitization operation that
530 fails is set here.
531 The values are taken from the \c SanitizeFlags
532 enum. On success, the value is \c
533 SanitizeFlags::SANITIZE_NONE
534
535 \param sanitizeOps : the bits here are used to set which sanitization
536 operations are carried out. The elements of the \c
537 SanitizeFlags enum define the operations.
538
539 <b>Notes:</b>
540 - If there is a failure in the sanitization, a \c MolSanitizeException
541 will be thrown.
542 - in general the user of this function should cast the molecule following
543 this function to a ROMol, so that new atoms and bonds cannot be added to
544 the molecule and screw up the sanitizing that has been done here
545*/
547 unsigned int &operationThatFailed,
548 unsigned int sanitizeOps = SANITIZE_ALL);
549//! \overload
551
552//! \brief Identifies chemistry problems (things that don't make chemical
553//! sense) in a molecule
554/*!
555 This functions uses the operations in sanitizeMol but does not change
556 the input structure and returns a list of the problems encountered instead
557 of stopping at the first failure,
558
559 The problems this looks for come from the sanitization operations:
560 -# mol.updatePropertyCache() : Unreasonable valences
561 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
562 in rings, aromatic bonds to non-aromatic atoms.
563
564 \param mol : the ROMol to be cleaned
565
566 \param sanitizeOps : the bits here are used to set which sanitization
567 operations are carried out. The elements of the \c
568 SanitizeFlags enum define the operations.
569
570 \return a vector of \c MolSanitizeException values that indicate what
571 problems were encountered
572
573*/
575std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
576 const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
577
578//! Possible aromaticity models
579/*!
580- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
581- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
582Book)
583- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
584does not consider the outer envelope of fused rings)
585- \c AROMATICITY_MDL
586- \c AROMATICITY_CUSTOM uses a caller-provided function
587*/
588typedef enum {
589 AROMATICITY_DEFAULT = 0x0, ///< future proofing
594 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
596
598
599//! Sets up the aromaticity for a molecule
600/*!
601
602 This is what happens here:
603 -# find all the simple rings by calling the findSSSR function
604 -# loop over all the Atoms in each ring and mark them if they are
605 candidates
606 for aromaticity. A ring atom is a candidate if it can spare electrons
607 to the ring and if it's from the first two rows of the periodic table.
608 -# based on the candidate atoms, mark the rings to be either candidates
609 or non-candidates. A ring is a candidate only if all its atoms are
610 candidates
611 -# apply Hueckel rule to each of the candidate rings to check if the ring
612 can be
613 aromatic
614
615 \param mol the RWMol of interest
616 \param model the aromaticity model to use
617 \param func a custom function for assigning aromaticity (only used when
618 model=\c AROMATICITY_CUSTOM)
619
620 \return >0 on success, <= 0 otherwise
621
622 <b>Assumptions:</b>
623 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
624 been called)
625
626*/
629 int (*func)(RWMol &) = nullptr);
630
631//! Designed to be called by the sanitizer to handle special cases before
632/// anything is done.
633/*!
634
635 Currently this:
636 - modifies nitro groups, so that the nitrogen does not have an
637 unreasonable valence of 5, as follows:
638 - the nitrogen gets a positive charge
639 - one of the oxygens gets a negative chage and the double bond to
640 this oxygen is changed to a single bond The net result is that nitro groups
641 can be counted on to be: \c "[N+](=O)[O-]"
642 - modifies halogen-oxygen containing species as follows:
643 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
644 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
645 \c [Cl,Br,I](=O)O -> [X+]([O-])O
646 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
647
648 \param mol the molecule of interest
649
650*/
652
653//! Designed to be called by the sanitizer to handle special cases for
654//! organometallic species before valence is perceived
655/*!
656
657 \b Note that this function is experimental and may either change in behavior
658 or be replaced with something else in future releases.
659
660 Currently this:
661 - replaces single bonds between "hypervalent" organic atoms and metals with
662 dative bonds (this is following an IUPAC recommendation:
663 https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)
664
665 \param mol the molecule of interest
666
667*/
669
670//! Called by the sanitizer to assign radical counts to atoms
672
673//! adjust the number of implicit and explicit Hs for special cases
674/*!
675
676 Currently this:
677 - modifies aromatic nitrogens so that, when appropriate, they have an
678 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
679
680 \param mol the molecule of interest
681
682 <b>Assumptions</b>
683 - this is called after the molecule has been sanitized,
684 aromaticity has been perceived, and the implicit valence of
685 everything has been calculated.
686
687*/
689
690//! Kekulizes the molecule
691/*!
692
693 \param mol the molecule of interest
694
695 \param markAtomsBonds if this is set to true, \c isAromatic boolean
696 settings on both the Bonds and Atoms are turned to false following the
697 Kekulization, otherwise they are left alone in their original state.
698
699 \param maxBackTracks the maximum number of attempts at back-tracking. The
700 algorithm uses a back-tracking procedure to revisit a previous setting of
701 double bond if we hit a wall in the kekulization process
702
703 <b>Notes:</b>
704 - this does not modify query bonds which have bond type queries (like
705 those which come from SMARTS) or rings containing them.
706 - even if \c markAtomsBonds is \c false the \c BondType for all modified
707 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
708 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
709
710*/
712 unsigned int maxBackTracks = 100);
713//! Kekulizes the molecule if possible. If the kekulization fails the molecule
714//! will not be modified
715/*!
716
717 \param mol the molecule of interest
718
719 \param markAtomsBonds if this is set to true, \c isAromatic boolean
720 settings on both the Bonds and Atoms are turned to false following the
721 Kekulization, otherwise they are left alone in their original state.
722
723 \param maxBackTracks the maximum number of attempts at back-tracking. The
724 algorithm uses a back-tracking procedure to revisit a previous setting of
725 double bond if we hit a wall in the kekulization process
726
727 \returns whether or not the kekulization succeeded
728
729 <b>Notes:</b>
730 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
731 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
732 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
733
734*/
736 bool markAtomsBonds = true,
737 unsigned int maxBackTracks = 100);
738
739//! flags the molecule's conjugated bonds
741
742//! calculates and sets the hybridization of all a molecule's Stoms
744
745//! @}
746
747//! \name Ring finding and SSSR
748//! @{
749
750//! finds a molecule's Smallest Set of Smallest Rings
751/*!
752 Currently this implements a modified form of Figueras algorithm
753 (JCICS - Vol. 36, No. 5, 1996, 986-991)
754
755 \param mol the molecule of interest
756 \param res used to return the vector of rings. Each entry is a vector with
757 atom indices. This information is also stored in the molecule's
758 RingInfo structure, so this argument is optional (see overload)
759 \param includeDativeBonds - determines whether or not dative bonds are used in
760 the ring finding.
761
762 \return number of smallest rings found
763
764 Base algorithm:
765 - The original algorithm starts by finding representative degree 2
766 nodes.
767 - Representative because if a series of deg 2 nodes are found only
768 one of them is picked.
769 - The smallest ring around each of them is found.
770 - The bonds that connect to this degree 2 node are them chopped off,
771 yielding
772 new deg two nodes
773 - The process is repeated on the new deg 2 nodes.
774 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
775 with it is found. A bond from this is "carefully" (look in the paper)
776 selected and chopped, yielding deg 2 nodes. The process is same as
777 above once this is done.
778
779 Our Modifications:
780 - If available, more than one smallest ring around a representative deg 2
781 node will be computed and stored
782 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
783 available)
784 and all the bond to that node are chopped.
785 - The extra rings that were found in this process are removed after all
786 the nodes have been covered.
787
788 These changes were motivated by several factors:
789 - We believe the original algorithm fails to find the correct SSSR
790 (finds the correct number of them but the wrong ones) on some sample
791 mols
792 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
793 done. The extra rings this process adds can be quite useful.
794*/
796 std::vector<std::vector<int>> &res,
797 bool includeDativeBonds = false);
798//! \overload
800 std::vector<std::vector<int>> *res = nullptr,
801 bool includeDativeBonds = false);
802
803//! use a DFS algorithm to identify ring bonds and atoms in a molecule
804/*!
805 \b NOTE: though the RingInfo structure is populated by this function,
806 the only really reliable calls that can be made are to check if
807 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
808 return values >0
809*/
811
813
814//! symmetrize the molecule's Smallest Set of Smallest Rings
815/*!
816 SSSR rings obatined from "findSSSR" can be non-unique in some case.
817 For example, cubane has five SSSR rings, not six as one would hope.
818
819 This function adds additional rings to the SSSR list if necessary
820 to make the list symmetric, e.g. all atoms in cubane will be part of the
821 same number of SSSRs. This function choses these extra rings from the extra
822 rings computed and discarded during findSSSR. The new ring are chosen such
823 that:
824 - replacing a same sized ring in the SSSR list with an extra ring yields
825 the same union of bond IDs as the original SSSR list
826
827 \param mol - the molecule of interest
828 \param res used to return the vector of rings. Each entry is a vector with
829 atom indices. This information is also stored in the molecule's
830 RingInfo structure, so this argument is optional (see overload)
831 \param includeDativeBonds - determines whether or not dative bonds are used in
832 the ring finding.
833
834 \return the total number of rings = (new rings + old SSSRs)
835
836 <b>Notes:</b>
837 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
838 first
839*/
841 std::vector<std::vector<int>> &res,
842 bool includeDativeBonds = false);
843//! \overload
845 bool includeDativeBonds = false);
846
847//! @}
848
849//! \name Shortest paths and other matrices
850//! @{
851
852//! returns a molecule's adjacency matrix
853/*!
854 \param mol the molecule of interest
855 \param useBO toggles use of bond orders in the matrix
856 \param emptyVal sets the empty value (for non-adjacent atoms)
857 \param force forces calculation of the matrix, even if already
858 computed
859 \param propNamePrefix used to set the cached property name
860
861 \return the adjacency matrix.
862
863 <b>Notes</b>
864 - The result of this is cached in the molecule's local property
865 dictionary, which will handle deallocation. The caller should <b>not</b> \c
866 delete this pointer.
867
868*/
870 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
871 const char *propNamePrefix = nullptr,
872 const boost::dynamic_bitset<> *bondsToUse = nullptr);
873
874//! Computes the molecule's topological distance matrix
875/*!
876 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
877
878 \param mol the molecule of interest
879 \param useBO toggles use of bond orders in the matrix
880 \param useAtomWts sets the diagonal elements of the result to
881 6.0/(atomic number) so that the matrix can be used to calculate
882 Balaban J values. This does not affect the bond weights.
883 \param force forces calculation of the matrix, even if already
884 computed
885 \param propNamePrefix used to set the cached property name
886
887 \return the distance matrix.
888
889 <b>Notes</b>
890 - The result of this is cached in the molecule's local property
891 dictionary, which will handle deallocation. The caller should <b>not</b> \c
892 delete this pointer.
893
894
895*/
897 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
898 bool force = false, const char *propNamePrefix = nullptr);
899
900//! Computes the molecule's topological distance matrix
901/*!
902 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
903
904 \param mol the molecule of interest
905 \param activeAtoms only elements corresponding to these atom indices
906 will be included in the calculation
907 \param bonds only bonds found in this list will be included in the
908 calculation
909 \param useBO toggles use of bond orders in the matrix
910 \param useAtomWts sets the diagonal elements of the result to
911 6.0/(atomic number) so that the matrix can be used to calculate
912 Balaban J values. This does not affect the bond weights.
913
914 \return the distance matrix.
915
916 <b>Notes</b>
917 - The results of this call are not cached, the caller <b>should</b> \c
918 delete
919 this pointer.
920
921
922*/
924 const ROMol &mol, const std::vector<int> &activeAtoms,
925 const std::vector<const Bond *> &bonds, bool useBO = false,
926 bool useAtomWts = false);
927
928//! Computes the molecule's 3D distance matrix
929/*!
930
931 \param mol the molecule of interest
932 \param confId the conformer to use
933 \param useAtomWts sets the diagonal elements of the result to
934 6.0/(atomic number)
935 \param force forces calculation of the matrix, even if already
936 computed
937 \param propNamePrefix used to set the cached property name
938 (if set to an empty string, the matrix will not be
939 cached)
940
941 \return the distance matrix.
942
943 <b>Notes</b>
944 - If propNamePrefix is not empty the result of this is cached in the
945 molecule's local property dictionary, which will handle deallocation.
946 In other cases the caller is responsible for freeing the memory.
947
948*/
950 const ROMol &mol, int confId = -1, bool useAtomWts = false,
951 bool force = false, const char *propNamePrefix = nullptr);
952//! Find the shortest path between two atoms
953/*!
954 Uses the Bellman-Ford algorithm
955
956 \param mol molecule of interest
957 \param aid1 index of the first atom
958 \param aid2 index of the second atom
959
960 \return an std::list with the indices of the atoms along the shortest
961 path
962
963 <b>Notes:</b>
964 - the starting and end atoms are included in the path
965 - if no path is found, an empty path is returned
966
967*/
968RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
969 int aid2);
970
971//! @}
972
973//! \name Stereochemistry
974//! @{
975
976// class to hold hybridizations
977
979 public:
981 throw FileParseException("not to be called without a mol parameter");
982 };
985 throw FileParseException("not to be called without a mol parameter");
986 };
987
988 ~Hybridizations() = default;
989
991 return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
992 }
993 // Atom::HybridizationType &operator[](unsigned int idx) {
994 // return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
995 // d_hybridizations[d_hybridizations[idx]];
996 // }
997
998 // // void clear() { d_hybridizations.clear(); }
999 // // void resize(unsigned int sz) { d_hybridizations.resize(sz); }
1000 unsigned int size() const { return d_hybridizations.size(); }
1001
1002 private:
1003 std::vector<int> d_hybridizations;
1004};
1005
1006//! removes bogus chirality markers (e.g. tetrahedral flags on non-sp3 centers):
1008
1009//! \overload
1011//! removes bogus atropisomeric markers (e.g. those without sp2 begin and end
1012//! atoms):
1015
1016//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
1017/*!
1018 \param mol the molecule of interest
1019 \param confId the conformer to use
1020 \param replaceExistingTags if this flag is true, any existing atomic chiral
1021 tags will be replaced
1022
1023 If the conformer provided is not a 3D conformer, nothing will be done.
1024
1025
1026 NOTE that this does not check to see if atoms are chiral centers (i.e. all
1027 substituents are different), it merely sets the chiral type flags based on
1028 the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
1029 you want chiral flags only on actual stereocenters.
1030*/
1032 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1033
1034//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
1035//! stereo flags to its bonds
1036/*!
1037
1038 \param mol the molecule of interest
1039 \param confId the conformer to use
1040 \param replaceExistingTags if this flag is true, any existing info about
1041 stereochemistry will be replaced
1042
1043 If the conformer provided is not a 3D conformer, nothing will be done.
1044*/
1046 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1047
1048//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
1049//! stereo flags to its bonds
1050/*!
1051
1052 \param mol the molecule of interest
1053 \param confId the conformer to use
1054 \param replaceExistingTags if this flag is true, any existing info about
1055 stereochemistry will be replaced
1056*/
1058 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1059
1060//! \deprecated: this function will be removed in a future release. Use
1061//! setDoubleBondNeighborDirections() instead
1063 int confId = -1);
1064//! Sets bond directions based on double bond stereochemistry
1066 ROMol &mol, const Conformer *conf = nullptr);
1067//! removes directions from single bonds. Wiggly bonds will have the property
1068//! _UnknownStereo set on them
1070 bool onlyWedgeFlags = false);
1071
1072//! removes directions from all bonds. Wiggly bonds and cross bonds will have
1073//! the property _UnknownStereo set on them
1076 bool onlyWedgeFlags = false);
1077
1078//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
1079//! directions
1081
1082//! Assign stereochemistry tags to atoms and bonds.
1083/*!
1084 If useLegacyStereoPerception is true, it also does the CIP stereochemistry
1085 assignment for the molecule's atoms (R/S) and double bonds (Z/E).
1086 This assignment is based on legacy code which is fast, but is
1087 known to incorrectly assign CIP labels in some cases.
1088 instead, to assign CIP labels based on an accurate, though slower,
1089 implementation of the CIP rules, call CIPLabeler::assignCIPLabels().
1090 Chiral atoms will have a property '_CIPCode' indicating their chiral code.
1091
1092 \param mol the molecule to use
1093 \param cleanIt if true, any existing values of the property `_CIPCode`
1094 will be cleared, atoms with a chiral specifier that aren't
1095 actually chiral (e.g. atoms with duplicate
1096 substituents or only 2 substituents, etc.) will have
1097 their chiral code set to CHI_UNSPECIFIED. Bonds with
1098 STEREOCIS/STEREOTRANS specified that have duplicate
1099 substituents based upon the CIP atom ranks will be
1100 marked STEREONONE.
1101 \param force causes the calculation to be repeated even if it has
1102 already been done
1103 \param flagPossibleStereoCenters set the _ChiralityPossible property on
1104 atoms that are possible stereocenters
1105
1106 <b>Notes:M</b>
1107 - Throughout we assume that we're working with a hydrogen-suppressed
1108 graph.
1109
1110*/
1112 ROMol &mol, bool cleanIt = false, bool force = false,
1113 bool flagPossibleStereoCenters = false);
1114//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1115/// i.e. Z/E)
1116/*!
1117
1118 \param mol the molecule of interest
1119*/
1121
1122//! \brief finds bonds that could be cis/trans in a molecule and mark them as
1123//! Bond::STEREOANY.
1124/*!
1125 \param mol the molecule of interest
1126 \param cleanIt toggles removal of stereo flags from double bonds that can
1127 not have stereochemistry
1128
1129 This function finds any double bonds that can potentially be part of
1130 a cis/trans system. No attempt is made here to mark them cis or
1131 trans. No attempt is made to detect double bond stereo in ring systems.
1132
1133 This function is useful in the following situations:
1134 - when parsing a mol file; for the bonds marked here, coordinate
1135 information on the neighbors can be used to indentify cis or trans
1136 states
1137 - when writing a mol file; bonds that can be cis/trans but not marked as
1138 either need to be specially marked in the mol file
1139 - finding double bonds with unspecified stereochemistry so they
1140 can be enumerated for downstream 3D tools
1141
1142 The CIPranks on the neighboring atoms are checked in this function. The
1143 _CIPCode property if set to any on the double bond.
1144*/
1146 bool cleanIt = false);
1147//! \brief Uses the molParity atom property to assign ChiralType to a
1148//! molecule's atoms
1149/*!
1150 \param mol the molecule of interest
1151 \param replaceExistingTags if this flag is true, any existing atomic chiral
1152 tags will be replaced
1153*/
1155 ROMol &mol, bool replaceExistingTags = true);
1156
1157//! @}
1158
1159//! returns the number of atoms which have a particular property set
1161 const ROMol &mol, std::string prop);
1162
1163//! returns whether or not a molecule needs to have Hs added to it.
1165
1166//! \brief Replaces haptic bond with explicit dative bonds.
1167/*!
1168 *
1169 * @param mol the molecule of interest
1170 *
1171 * One way of showing haptic bonds (such as cyclopentadiene to iron in
1172 * ferrocene) is to use a dummy atom with a dative bond to the iron atom with
1173 * the bond labelled with the atoms involved in the organic end of the bond.
1174 * Another way is to have explicit dative bonds from the atoms of the haptic
1175 * group to the metal atom. This function converts the former representation to
1176 * the latter.
1177 */
1179
1180//! \overload modifies molecule in place.
1182
1183//! \brief Replaces explicit dative bonds with haptic.
1184/*!
1185 *
1186 * @param mol the molecule of interest
1187 *
1188 * Does the reverse of hapticBondsToDative. If there are multiple contiguous
1189 * atoms attached by dative bonds to an atom (probably a metal atom), the dative
1190 * bonds will be replaced by a dummy atom in their centre attached to the
1191 * (metal) atom by a dative bond, which is labelled with ENDPTS of the atoms
1192 * that had the original dative bonds.
1193 */
1195
1196//! \overload modifies molecule in place.
1198
1199/*!
1200 Calculates a molecule's average molecular weight
1201
1202 \param mol the molecule of interest
1203 \param onlyHeavy (optional) if this is true (the default is false),
1204 only heavy atoms will be included in the MW calculation
1205
1206 \return the AMW
1207*/
1209 bool onlyHeavy = false);
1210/*!
1211 Calculates a molecule's exact molecular weight
1212
1213 \param mol the molecule of interest
1214 \param onlyHeavy (optional) if this is true (the default is false),
1215 only heavy atoms will be included in the MW calculation
1216
1217 \return the exact MW
1218*/
1220 bool onlyHeavy = false);
1221
1222/*!
1223 Calculates a molecule's formula
1224
1225 \param mol the molecule of interest
1226 \param separateIsotopes if true, isotopes will show up separately in the
1227 formula. So C[13CH2]O will give the formula: C[13C]H6O
1228 \param abbreviateHIsotopes if true, 2H and 3H will be represented as
1229 D and T instead of [2H] and [3H]. This only applies if \c separateIsotopes
1230 is true
1231
1232 \return the formula as a string
1233*/
1235 const ROMol &mol, bool separateIsotopes = false,
1236 bool abbreviateHIsotopes = true);
1237
1238namespace details {
1239//! not recommended for use in other code
1240RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(
1241 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1242 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1243 unsigned int maxBackTracks = 100);
1244
1245// If the bond is dative, and it has a common_properties::MolFileBondEndPts
1246// prop, returns a vector of the indices of the atoms mentioned in the prop.
1247RDKIT_GRAPHMOL_EXPORT std::vector<int> hapticBondEndpoints(const Bond *bond);
1248
1249} // namespace details
1250
1251//! attachment points encoded as attachPt properties are added to the graph as
1252/// dummy atoms
1253/*!
1254 *
1255 * @param mol the molecule of interest
1256 * @param addAsQueries if true, the dummy atoms will be added as null queries
1257 * (i.e. they will match any atom in a substructure search)
1258 * @param addCoords if true and the molecule has one or more conformers,
1259 * positions for the attachment points will be added to the conformer(s).
1260 *
1261 */
1263 bool addAsQueries = true,
1264 bool addCoords = true);
1265//! dummy atoms in the graph are removed and replaced with attachment point
1266//! annotations on the attached atoms
1267/*!
1268 *
1269 * @param mol the molecule of interest
1270 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1271 * property will be collapsed
1272 *
1273 * In order for a dummy atom to be considered for collapsing it must have:
1274 * - degree 1 with a single or unspecified bond
1275 * - the bond to it can not be wedged
1276 * - either no query or be an AtomNullQuery
1277 *
1278 */
1280 bool markedOnly = true);
1281
1282namespace details {
1283//! attachment points encoded as attachPt properties are added to the graph as
1284/// dummy atoms
1285/*!
1286 *
1287 * @param mol the molecule of interest
1288 * @param atomIdx the index of the atom to which the attachment point should be
1289 * added
1290 * @param val the attachment point value. Should be 1 or 2
1291 * @param addAsQueries if true, the dummy atoms will be added as null queries
1292 * (i.e. they will match any atom in a substructure search)
1293 * @param addCoords if true and the molecule has one or more conformers,
1294 * positions for the attachment points will be added to the conformer(s).
1295 *
1296 */
1297RDKIT_GRAPHMOL_EXPORT unsigned int addExplicitAttachmentPoint(
1298 RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery = true,
1299 bool addCoords = true);
1300
1301//! returns whether or not an atom is an attachment point
1302/*!
1303 *
1304 * @param mol the molecule of interest
1305 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1306 * property will be collapsed
1307 *
1308 * In order for a dummy atom to be considered for collapsing it must have:
1309 * - degree 1 with a single or unspecified bond
1310 * - the bond to it can not be wedged
1311 * - either no query or be an AtomNullQuery
1312 *
1313 */
1314RDKIT_GRAPHMOL_EXPORT bool isAttachmentPoint(const Atom *atom,
1315 bool markedOnly = true);
1316
1317} // namespace details
1318
1319} // namespace MolOps
1320} // namespace RDKit
1321
1322#endif
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition Atom.h:75
HybridizationType
store hybridization
Definition Atom.h:86
class for representing a bond
Definition Bond.h:47
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
used by various file parsing classes to indicate a parse error
unsigned int size() const
Definition MolOps.h:1000
Atom::HybridizationType operator[](int idx)
Definition MolOps.h:990
Hybridizations(const Hybridizations &)
Definition MolOps.h:984
Hybridizations(const ROMol &mol)
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms and bonds.
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT std::string getMolFormula(const ROMol &mol, bool separateIsotopes=false, bool abbreviateHIsotopes=true)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double getExactMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::pair< bool, bool > hasQueryHs(const ROMol &mol)
returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition MolOps.h:588
@ AROMATICITY_RDKIT
Definition MolOps.h:590
@ AROMATICITY_MDL
Definition MolOps.h:592
@ AROMATICITY_CUSTOM
use a function
Definition MolOps.h:594
@ AROMATICITY_DEFAULT
future proofing
Definition MolOps.h:589
@ AROMATICITY_MMFF94
Definition MolOps.h:593
@ AROMATICITY_SIMPLE
Definition MolOps.h:591
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * hapticBondsToDative(const ROMol &mol)
Replaces haptic bond with explicit dative bonds.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT void expandAttachmentPoints(RWMol &mol, bool addAsQueries=true, bool addCoords=true)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT ROMol * dativeBondsToHaptic(const ROMol &mol)
Replaces explicit dative bonds with haptic.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
@ SANITIZE_SETAROMATICITY
Definition MolOps.h:501
@ SANITIZE_CLEANUPATROPISOMERS
Definition MolOps.h:507
@ SANITIZE_PROPERTIES
Definition MolOps.h:497
@ SANITIZE_CLEANUP_ORGANOMETALLICS
Definition MolOps.h:506
@ SANITIZE_SETCONJUGATION
Definition MolOps.h:502
@ SANITIZE_SYMMRINGS
Definition MolOps.h:498
@ SANITIZE_ADJUSTHS
Definition MolOps.h:505
@ SANITIZE_CLEANUPCHIRALITY
Definition MolOps.h:504
@ SANITIZE_FINDRADICALS
Definition MolOps.h:500
@ SANITIZE_KEKULIZE
Definition MolOps.h:499
@ SANITIZE_SETHYBRIDIZATION
Definition MolOps.h:503
@ SANITIZE_CLEANUP
Definition MolOps.h:496
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT void cleanupAtropisomers(RWMol &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void clearAllBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void clearDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (e.g. tetrahedral flags on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT void collapseAttachmentPoints(RWMol &mol, bool markedOnly=true)
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT double getAvgMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=nullptr, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
AdjustQueryWhichFlags
Definition MolOps.h:373
@ ADJUST_IGNORERINGS
Definition MolOps.h:376
@ ADJUST_IGNORENONE
Definition MolOps.h:374
@ ADJUST_IGNOREMAPPED
Definition MolOps.h:379
@ ADJUST_IGNORENONDUMMIES
Definition MolOps.h:378
@ ADJUST_IGNOREDUMMIES
Definition MolOps.h:377
@ ADJUST_IGNORECHAINS
Definition MolOps.h:375
@ ADJUST_IGNOREALL
Definition MolOps.h:380
Std stuff.
std::vector< double > INVAR_VECT
Definition MolOps.h:32
bool rdvalue_is(const RDValue_cast_t)
INVAR_VECT::iterator INVAR_VECT_I
Definition MolOps.h:33
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition MolOps.h:34
std::vector< UINT > UINT_VECT
Definition types.h:308
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition MolOps.h:392
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition MolOps.h:443