RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Subgraphs.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11/*! \file Subgraphs.h
12
13 \brief functionality for finding subgraphs and paths in molecules
14
15 Difference between _subgraphs_ and _paths_ :
16 Subgraphs are potentially branched, whereas paths (in our
17 terminology at least) cannot be. So, the following graph:
18\verbatim
19 C--0--C--1--C--3--C
20 |
21 2
22 |
23 C
24\endverbatim
25 has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3)
26 but only 2 _paths_ of length 3: (0,1,3),(2,1,3)
27*/
28#include <RDGeneral/export.h>
29#ifndef RD_SUBGRAPHS_H
30#define RD_SUBGRAPHS_H
31
32#include <vector>
33#include <list>
34#include <map>
35#include <unordered_map>
36
37namespace RDKit {
38class ROMol;
39// NOTE: before replacing the defn of PATH_TYPE: be aware that
40// we do occasionally use reverse iterators on these things, so
41// replacing with a slist would probably be a bad idea.
42typedef std::vector<int> PATH_TYPE;
43typedef std::list<PATH_TYPE> PATH_LIST;
44typedef PATH_LIST::const_iterator PATH_LIST_CI;
45
46typedef std::map<int, PATH_LIST> INT_PATH_LIST_MAP;
47typedef INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI;
48typedef INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I;
49
50// --- --- --- --- --- --- --- --- --- --- --- --- ---
51//
52//
53// --- --- --- --- --- --- --- --- --- --- --- --- ---
54
55//! \brief find all bond subgraphs in a range of sizes
56/*!
57 * \param mol - the molecule to be considered
58 * \param lowerLen - the minimum subgraph size to find
59 * \param upperLen - the maximum subgraph size to find
60 * \param useHs - if set, hydrogens in the graph will be considered
61 * eligible to be in paths. NOTE: this will not add
62 * Hs to the graph.
63 * \param rootedAtAtom - if non-negative, only subgraphs that start at
64 * this atom will be returned.
65 *
66 * The result is a map from subgraph size -> list of paths
67 * (i.e. list of list of bond indices)
68 */
70 const ROMol &mol, unsigned int lowerLen, unsigned int upperLen,
71 bool useHs = false, int rootedAtAtom = -1);
72
73//! \brief find all bond subgraphs of a particular size
74/*!
75 * \param mol - the molecule to be considered
76 * \param targetLen - the length of the subgraphs to be returned
77 * \param useHs - if set, hydrogens in the graph will be considered
78 * eligible to be in paths. NOTE: this will not add
79 * Hs to the graph.
80 * \param rootedAtAtom - if non-negative, only subgraphs that start at
81 * this atom will be returned.
82 *
83 *
84 * The result is a list of paths (i.e. list of list of bond indices)
85 */
87findAllSubgraphsOfLengthN(const ROMol &mol, unsigned int targetLen,
88 bool useHs = false, int rootedAtAtom = -1);
89
90//! \brief find unique bond subgraphs of a particular size
91/*!
92 * \param mol - the molecule to be considered
93 * \param targetLen - the length of the subgraphs to be returned
94 * \param useHs - if set, hydrogens in the graph will be considered
95 * eligible to be in paths. NOTE: this will not add
96 * Hs to the graph.
97 * \param useBO - if set, bond orders will be considered when uniquifying
98 * the paths
99 * \param rootedAtAtom - if non-negative, only subgraphs that start at
100 * this atom will be returned.
101 *
102 * The result is a list of paths (i.e. list of list of bond indices)
103 */
105 const ROMol &mol, unsigned int targetLen, bool useHs = false,
106 bool useBO = true, int rootedAtAtom = -1);
107//! \brief find all paths of a particular size
108/*!
109 * \param mol - the molecule to be considered
110 * \param targetLen - the length of the paths to be returned
111 * \param useBonds - if set, the path indices will be bond indices,
112 * not atom indices
113 * \param useHs - if set, hydrogens in the graph will be considered
114 * eligible to be in paths. NOTE: this will not add
115 * Hs to the graph.
116 * \param rootedAtAtom - if non-negative, only subgraphs that start at
117 * this atom will be returned.
118 * \param onlyShortestPaths - if set then only paths which are <= the shortest
119 * path between the begin and end atoms will be
120 * included in the results
121 *
122 * The result is a list of paths (i.e. list of list of bond indices)
123 */
125 const ROMol &mol, unsigned int targetLen, bool useBonds = true,
126 bool useHs = false, int rootedAtAtom = -1, bool onlyShortestPaths = false);
128 const ROMol &mol, unsigned int lowerLen, unsigned int upperLen,
129 bool useBonds = true, bool useHs = false, int rootedAtAtom = -1,
130 bool onlyShortestPaths = false);
131
132//! \brief Find bond subgraphs of a particular radius around an atom.
133//! Return empty result if there is no bond at the requested radius.
134/*!
135 * \param mol - the molecule to be considered
136 * \param radius - the radius of the subgraphs to be considered
137 * \param rootedAtAtom - the atom to consider
138 * \param useHs - if set, hydrogens in the graph will be considered
139 * eligible to be in paths. NOTE: this will not add
140 * Hs to the graph.
141 * \param enforceSize - If false, all the bonds within the requested radius
142 * (<= radius) is collected. Otherwise, at least one bond
143 * located at the requested radius must be found and
144 * added. \param atomMap - Optional: If provided, it will measure the minimum
145 * distance of the atom from the rooted atom (start with 0 from the rooted
146 * atom). The result is a pair of the atom ID and the distance. The result is a
147 * path (a vector of bond indices)
148 */
150 const ROMol &mol, unsigned int radius, unsigned int rootedAtAtom,
151 bool useHs = false, bool enforceSize = true,
152 std::unordered_map<unsigned int, unsigned int> *atomMap = nullptr);
153
154} // namespace RDKit
155
156#endif
#define RDKIT_SUBGRAPHS_EXPORT
Definition export.h:513
Std stuff.
RDKIT_SUBGRAPHS_EXPORT PATH_LIST findUniqueSubgraphsOfLengthN(const ROMol &mol, unsigned int targetLen, bool useHs=false, bool useBO=true, int rootedAtAtom=-1)
find unique bond subgraphs of a particular size
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SUBGRAPHS_EXPORT PATH_LIST findAllPathsOfLengthN(const ROMol &mol, unsigned int targetLen, bool useBonds=true, bool useHs=false, int rootedAtAtom=-1, bool onlyShortestPaths=false)
find all paths of a particular size
std::list< PATH_TYPE > PATH_LIST
Definition Subgraphs.h:43
RDKIT_SUBGRAPHS_EXPORT PATH_TYPE findAtomEnvironmentOfRadiusN(const ROMol &mol, unsigned int radius, unsigned int rootedAtAtom, bool useHs=false, bool enforceSize=true, std::unordered_map< unsigned int, unsigned int > *atomMap=nullptr)
Find bond subgraphs of a particular radius around an atom. Return empty result if there is no bond at...
std::vector< int > PATH_TYPE
Definition Subgraphs.h:42
RDKIT_SUBGRAPHS_EXPORT PATH_LIST findAllSubgraphsOfLengthN(const ROMol &mol, unsigned int targetLen, bool useHs=false, int rootedAtAtom=-1)
find all bond subgraphs of a particular size
std::map< int, PATH_LIST > INT_PATH_LIST_MAP
Definition Subgraphs.h:46
RDKIT_SUBGRAPHS_EXPORT INT_PATH_LIST_MAP findAllPathsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, unsigned int upperLen, bool useBonds=true, bool useHs=false, int rootedAtAtom=-1, bool onlyShortestPaths=false)
INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I
Definition Subgraphs.h:48
INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI
Definition Subgraphs.h:47
RDKIT_SUBGRAPHS_EXPORT INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, unsigned int upperLen, bool useHs=false, int rootedAtAtom=-1)
find all bond subgraphs in a range of sizes
PATH_LIST::const_iterator PATH_LIST_CI
Definition Subgraphs.h:44