RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Abbreviations.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_ABBREVIATIONS_H
12#define RD_ABBREVIATIONS_H
14#include <utility>
15#include <vector>
16#include <string>
17#include <memory>
18
19namespace RDKit {
20class ROMol;
21class RWMol;
22
23namespace Abbreviations {
25 std::string label;
26 std::string displayLabel;
27 std::string displayLabelW;
28 std::string smarts;
29 std::shared_ptr<ROMol> mol; //!< optional
30 std::vector<unsigned int> extraAttachAtoms; //!< optional
31 bool operator==(const AbbreviationDefinition &other) const {
32 return label == other.label && displayLabel == other.displayLabel &&
33 displayLabelW == other.displayLabelW && smarts == other.smarts;
34 }
35 bool operator!=(const AbbreviationDefinition &other) const {
36 return !(*this == other);
37 }
38};
42 AbbreviationMatch(std::vector<std::pair<int, int>> matchArg,
43 AbbreviationDefinition abbrevArg)
44 : match(std::move(matchArg)), abbrev(std::move(abbrevArg)) {}
45 AbbreviationMatch() : match(), abbrev() {}
46 bool operator==(const AbbreviationMatch &other) const {
47 return abbrev == other.abbrev && match == other.match;
48 }
49 bool operator!=(const AbbreviationMatch &other) const {
50 return !(*this == other);
51 }
52};
53namespace common_properties {
54RDKIT_ABBREVIATIONS_EXPORT extern const std::string numDummies;
55RDKIT_ABBREVIATIONS_EXPORT extern const std::string origAtomMapping;
56RDKIT_ABBREVIATIONS_EXPORT extern const std::string origBondMapping;
57} // namespace common_properties
58namespace Utils {
59//! returns the default set of abbreviation definitions
60RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
62//! returns the default set of linker definitions
63RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
65
66//! parses a string describing abbreviation matches and returns the result
67/*
68
69\param text the data to be parsed, see below for the format
70\param removeExtraDummies controls whether or not dummy atoms beyond atom 0 are
71 removed. Set this to true to create abbreviations for linkers
72\param allowConnectionToDummies allows abbreviations to directly connect to
73 abbreviations. set this to true for linkers
74
75Format of the text data:
76 A series of lines, each of which contains:
77
78 label SMARTS displayLabel displayLabelW
79
80 the "displayLabel" and "displayLabelW" fields are optional.
81 where label is the label used for the abbreviation,
82 SMARTS is the SMARTS definition of the abbreviation.
83 displayLabel is used in drawings to render the abbreviations.
84 displayLabelW is the display label if a bond comes in from the right
85
86 Use dummies to indicate attachment points. The assumption is that the first
87 atom is a dummy (one will be added if this is not true) and that the second
88 atom is the surrogate for the rest of the group.
89
90*/
91RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
92parseAbbreviations(const std::string &text, bool removeExtraDummies = false,
93 bool allowConnectionToDummies = false);
94//! \brief equivalent to calling \c parseAbbreviations(text,true,true)
95inline std::vector<AbbreviationDefinition> parseLinkers(
96 const std::string &text) {
97 return parseAbbreviations(text, true, true);
98};
99} // namespace Utils
100
101//! returns all matches for the abbreviations across the molecule
102/*!
103
104 \param abbrevs the abbreviations to look for. This list is used in order.
105 \param maxCoverage any abbreviation that covers than more than this fraction
106 of the molecule's atoms (not counting dummies) will not be returned.
107*/
108RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationMatch>
110 const ROMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
111 double maxCoverage = 0.4);
112//! applies the abbreviation matches to a molecule, modifying it in place.
113//! the modified molecule is not sanitized
115 RWMol &mol, const std::vector<AbbreviationMatch> &matches);
116//! creates "SUP" SubstanceGroups on the molecule describing the abbreviation
118 RWMol &mol, const std::vector<AbbreviationMatch> &matches);
119//! convenience function for finding and applying abbreviations
120//! the modified molecule is not sanitized
122 RWMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
123 double maxCoverage = 0.4, bool sanitize = true);
124//! convenience function for finding and labeling abbreviations as SUP
125//! SubstanceGroups
127 RWMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
128 double maxCoverage = 0.4);
129//! collapses abbreviation (i.e. "SUP") substance groups
130//! the modified molecule is not sanitized
132
133} // namespace Abbreviations
134} // namespace RDKit
135#endif
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_ABBREVIATIONS_EXPORT
Definition export.h:9
std::vector< AbbreviationDefinition > parseLinkers(const std::string &text)
equivalent to calling parseAbbreviations(text,true,true)
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultLinkers()
returns the default set of linker definitions
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > parseAbbreviations(const std::string &text, bool removeExtraDummies=false, bool allowConnectionToDummies=false)
parses a string describing abbreviation matches and returns the result
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultAbbreviations()
returns the default set of abbreviation definitions
RDKIT_ABBREVIATIONS_EXPORT const std::string origAtomMapping
RDKIT_ABBREVIATIONS_EXPORT const std::string origBondMapping
RDKIT_ABBREVIATIONS_EXPORT const std::string numDummies
RDKIT_ABBREVIATIONS_EXPORT void applyMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(RWMol &mol)
RDKIT_ABBREVIATIONS_EXPORT void labelMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
RDKIT_ABBREVIATIONS_EXPORT void condenseMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4, bool sanitize=true)
RDKIT_ABBREVIATIONS_EXPORT void labelMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
creates "SUP" SubstanceGroups on the molecule describing the abbreviation
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationMatch > findApplicableAbbreviationMatches(const ROMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
returns all matches for the abbreviations across the molecule
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
bool rdvalue_is(const RDValue_cast_t)
std::vector< unsigned int > extraAttachAtoms
optional
bool operator==(const AbbreviationDefinition &other) const
std::shared_ptr< ROMol > mol
optional
bool operator!=(const AbbreviationDefinition &other) const
AbbreviationMatch(std::vector< std::pair< int, int > > matchArg, AbbreviationDefinition abbrevArg)
bool operator!=(const AbbreviationMatch &other) const
bool operator==(const AbbreviationMatch &other) const