RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <iostream>
23#include <cassert>
24#include <cstring>
25#include <vector>
26
27// #define VERBOSE_CANON 1
28
29namespace RDKit {
30namespace Canon {
31struct canon_atom;
32
34 Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
35 unsigned int bondStereo{
36 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
37 unsigned int nbrSymClass{0};
38 unsigned int nbrIdx{0};
39 Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
40 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
41 const std::string *p_symbol{
42 nullptr}; // if provided, this is used to order bonds
43 unsigned int bondIdx{0};
44
47 unsigned int nsc, unsigned int bidx)
48 : bondType(bt),
49 bondStereo(static_cast<unsigned int>(bs)),
50 nbrSymClass(nsc),
51 nbrIdx(ni),
52 bondIdx(bidx) {}
53 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
54 unsigned int nsc, unsigned int bidx)
55 : bondType(bt),
56 bondStereo(bs),
57 nbrSymClass(nsc),
58 nbrIdx(ni),
59 bondIdx(bidx) {}
60
61 int compareStereo(const bondholder &o) const;
62
63 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
64 static bool greater(const bondholder &lhs, const bondholder &rhs) {
65 return compare(lhs, rhs) > 0;
66 }
67
68 static int compare(const bondholder &x, const bondholder &y,
69 unsigned int div = 1) {
70 if (x.p_symbol && y.p_symbol) {
71 if ((*x.p_symbol) < (*y.p_symbol)) {
72 return -1;
73 } else if ((*x.p_symbol) > (*y.p_symbol)) {
74 return 1;
75 }
76 }
77 if (x.bondType < y.bondType) {
78 return -1;
79 } else if (x.bondType > y.bondType) {
80 return 1;
81 }
82 if (x.bondStereo < y.bondStereo) {
83 return -1;
84 } else if (x.bondStereo > y.bondStereo) {
85 return 1;
86 }
87 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
88 if (scdiv) {
89 return scdiv;
90 }
91 if (x.bondStereo && y.bondStereo) {
92 auto cs = x.compareStereo(y);
93 if (cs) {
94 return cs;
95 }
96 }
97 return 0;
98 }
99};
101 const Atom *atom{nullptr};
102 int index{-1};
103 unsigned int degree{0};
104 unsigned int totalNumHs{0};
105 bool hasRingNbr{false};
106 bool isRingStereoAtom{false};
107 unsigned int whichStereoGroup{0};
108 StereoGroupType typeOfStereoGroup{StereoGroupType::STEREO_ABSOLUTE};
109 std::unique_ptr<int[]> nbrIds;
110 const std::string *p_symbol{
111 nullptr}; // if provided, this is used to order atoms
112 std::vector<int> neighborNum;
113 std::vector<int> revistedNeighbors;
114 std::vector<bondholder> bonds;
115};
116
118 canon_atom *atoms, std::vector<bondholder> &nbrs);
119
121 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
122 std::vector<std::pair<unsigned int, unsigned int>> &result);
123
124/*
125 * Different types of atom compare functions:
126 *
127 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
128 *dependent chirality
129 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
130 *highly symmetrical graphs/molecules
131 * - AtomCompareFunctor: Basic atom compare function which also allows to
132 *include neighbors within the ranking
133 */
134
136 public:
137 Canon::canon_atom *dp_atoms{nullptr};
138 const ROMol *dp_mol{nullptr};
139 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
140 *dp_bondsInPlay{nullptr};
141
144 Canon::canon_atom *atoms, const ROMol &m,
145 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
146 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
147 : dp_atoms(atoms),
148 dp_mol(&m),
149 dp_atomsInPlay(atomsInPlay),
150 dp_bondsInPlay(bondsInPlay) {}
151 int operator()(int i, int j) const {
152 PRECONDITION(dp_atoms, "no atoms");
153 PRECONDITION(dp_mol, "no molecule");
154 PRECONDITION(i != j, "bad call");
155 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
156 return 0;
157 }
158
159 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
160 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
161 }
162 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
163 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
164 }
165 for (unsigned int ii = 0;
166 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
167 int cmp =
168 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
169 if (cmp) {
170 return cmp;
171 }
172 }
173
174 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
175 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
176 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
177 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
178 }
179 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
180 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
181 }
182 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
183 int cmp = swapsi[ii].second - swapsj[ii].second;
184 if (cmp) {
185 return cmp;
186 }
187 }
188 return 0;
189 }
190};
191
193 public:
194 Canon::canon_atom *dp_atoms{nullptr};
195 const ROMol *dp_mol{nullptr};
196 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
197 *dp_bondsInPlay{nullptr};
198
201 Canon::canon_atom *atoms, const ROMol &m,
202 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
203 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
204 : dp_atoms(atoms),
205 dp_mol(&m),
206 dp_atomsInPlay(atomsInPlay),
207 dp_bondsInPlay(bondsInPlay) {}
208 int operator()(int i, int j) const {
209 PRECONDITION(dp_atoms, "no atoms");
210 PRECONDITION(dp_mol, "no molecule");
211 PRECONDITION(i != j, "bad call");
212 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
213 return 0;
214 }
215
216 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
217 return -1;
218 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
219 return 1;
220 }
221
222 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
223 return -1;
224 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
225 return 1;
226 }
227
228 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
229 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
230 }
231 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
232 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
233 }
234 for (unsigned int ii = 0;
235 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
236 int cmp =
237 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
238 if (cmp) {
239 return cmp;
240 }
241 }
242
243 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
244 return -1;
245 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
246 return 1;
247 }
248 return 0;
249 }
250};
251
252namespace {
253unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
254 unsigned int i) {
255 unsigned int res = 0;
256 std::vector<unsigned int> perm;
257 perm.reserve(dp_atoms[i].atom->getDegree());
258 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
259 auto rnk = dp_atoms[nbr->getIdx()].index;
260 // make sure we don't have duplicate ranks
261 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
262 break;
263 } else {
264 perm.push_back(rnk);
265 }
266 }
267 if (perm.size() == dp_atoms[i].atom->getDegree()) {
268 auto ctag = dp_atoms[i].atom->getChiralTag();
271 auto sortedPerm = perm;
272 std::sort(sortedPerm.begin(), sortedPerm.end());
275 if (nswaps % 2) {
276 res = res == 2 ? 1 : 2;
277 }
278 }
279 }
280 return res;
281}
282} // namespace
284 unsigned int getAtomRingNbrCode(unsigned int i) const {
285 if (!dp_atoms[i].hasRingNbr) {
286 return 0;
287 }
288
289 auto nbrs = dp_atoms[i].nbrIds.get();
290 unsigned int code = 0;
291 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
292 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
293 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
294 }
295 }
296 return code;
297 }
298
299 int basecomp(int i, int j) const {
300 unsigned int ivi, ivj;
301
302 // always start with the current class:
303 ivi = dp_atoms[i].index;
304 ivj = dp_atoms[j].index;
305 if (ivi < ivj) {
306 return -1;
307 } else if (ivi > ivj) {
308 return 1;
309 }
310 if (df_useAtomMaps) {
311 // use the atom-mapping numbers if they were assigned
312 int molAtomMapNumber_i = 0;
313 int molAtomMapNumber_j = 0;
314 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
315 molAtomMapNumber_i);
316 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
317 molAtomMapNumber_j);
318 if (molAtomMapNumber_i < molAtomMapNumber_j) {
319 return -1;
320 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
321 return 1;
322 }
323 }
324 // start by comparing degree
325 ivi = dp_atoms[i].degree;
326 ivj = dp_atoms[j].degree;
327 if (ivi < ivj) {
328 return -1;
329 } else if (ivi > ivj) {
330 return 1;
331 }
332 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
333 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
334 return -1;
335 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
336 return 1;
337 } else {
338 return 0;
339 }
340 }
341
342 // move onto atomic number
343 ivi = dp_atoms[i].atom->getAtomicNum();
344 ivj = dp_atoms[j].atom->getAtomicNum();
345 if (ivi < ivj) {
346 return -1;
347 } else if (ivi > ivj) {
348 return 1;
349 }
350 // isotopes if we're using them
351 if (df_useIsotopes) {
352 ivi = dp_atoms[i].atom->getIsotope();
353 ivj = dp_atoms[j].atom->getIsotope();
354 if (ivi < ivj) {
355 return -1;
356 } else if (ivi > ivj) {
357 return 1;
358 }
359 }
360
361 // nHs
362 ivi = dp_atoms[i].totalNumHs;
363 ivj = dp_atoms[j].totalNumHs;
364 if (ivi < ivj) {
365 return -1;
366 } else if (ivi > ivj) {
367 return 1;
368 }
369 // charge
370 ivi = dp_atoms[i].atom->getFormalCharge();
371 ivj = dp_atoms[j].atom->getFormalCharge();
372 if (ivi < ivj) {
373 return -1;
374 } else if (ivi > ivj) {
375 return 1;
376 }
377 // presence of specified chirality if it's being used
378 if (df_useChiralPresence) {
379 ivi =
380 dp_atoms[i].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
381 ivj =
382 dp_atoms[j].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
383 if (ivi < ivj) {
384 return -1;
385 } else if (ivi > ivj) {
386 return 1;
387 }
388 }
389 // chirality if we're using it
390 if (df_useChirality) {
391 // look at enhanced stereo
392 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
393 // it's set then we're in an SG
394 ivj = dp_atoms[j].whichStereoGroup;
395 if (ivi || ivj) {
396 if (ivi && !ivj) {
397 return 1;
398 } else if (ivj && !ivi) {
399 return -1;
400 } else if (ivi && ivj) {
401 ivi = static_cast<unsigned int>(dp_atoms[i].typeOfStereoGroup);
402 ivj = static_cast<unsigned int>(dp_atoms[j].typeOfStereoGroup);
403 if (ivi < ivj) {
404 return -1;
405 } else if (ivi > ivj) {
406 return 1;
407 }
408 ivi = dp_atoms[i].whichStereoGroup - 1;
409 ivj = dp_atoms[j].whichStereoGroup - 1;
410 // now check the current classes of the other members of the SG
411 std::set<unsigned int> sgi;
412 for (auto sgat : dp_mol->getStereoGroups()[ivi].getAtoms()) {
413 sgi.insert(dp_atoms[sgat->getIdx()].index);
414 }
415 std::set<unsigned int> sgj;
416 for (auto sgat : dp_mol->getStereoGroups()[ivj].getAtoms()) {
417 sgj.insert(dp_atoms[sgat->getIdx()].index);
418 }
419 if (sgi < sgj) {
420 return -1;
421 } else if (sgi > sgj) {
422 return 1;
423 }
424 }
425 } else {
426 // if there's no stereogroup, then use whatever atom stereochem is
427 // specfied:
428 ivi = 0;
429 ivj = 0;
430 // can't actually use values here, because they are arbitrary
431 ivi = dp_atoms[i].atom->getChiralTag() != 0;
432 ivj = dp_atoms[j].atom->getChiralTag() != 0;
433 if (ivi < ivj) {
434 return -1;
435 } else if (ivi > ivj) {
436 return 1;
437 }
438 // stereo set
439 if (ivi && ivj) {
440 if (ivi) {
441 ivi = getChiralRank(dp_mol, dp_atoms, i);
442 }
443 if (ivj) {
444 ivj = getChiralRank(dp_mol, dp_atoms, j);
445 }
446 if (ivi < ivj) {
447 return -1;
448 } else if (ivi > ivj) {
449 return 1;
450 }
451 }
452 }
453 }
454
455 if (df_useChiralityRings) {
456 // ring stereochemistry
457 ivi = getAtomRingNbrCode(i);
458 ivj = getAtomRingNbrCode(j);
459 if (ivi < ivj) {
460 return -1;
461 } else if (ivi > ivj) {
462 return 1;
463 } // bond stereo is taken care of in the neighborhood comparison
464 }
465 return 0;
466 }
467
468 public:
469 Canon::canon_atom *dp_atoms{nullptr};
470 const ROMol *dp_mol{nullptr};
471 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
472 *dp_bondsInPlay{nullptr};
473 bool df_useNbrs{false};
474 bool df_useIsotopes{true};
475 bool df_useChirality{true};
476 bool df_useChiralityRings{true};
477 bool df_useAtomMaps{true};
478 bool df_useChiralPresence{true};
479
482 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
483 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
484 : dp_atoms(atoms),
485 dp_mol(&m),
486 dp_atomsInPlay(atomsInPlay),
487 dp_bondsInPlay(bondsInPlay) {}
488 int operator()(int i, int j) const {
489 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
490 return 0;
491 }
492 int v = basecomp(i, j);
493 if (v) {
494 return v;
495 }
496
497 if (df_useNbrs) {
498 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
499 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
500 }
501 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
502 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
503 }
504
505 for (unsigned int ii = 0;
506 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
507 ++ii) {
508 int cmp =
509 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
510 if (cmp) {
511 return cmp;
512 }
513 }
514
515 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
516 return -1;
517 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
518 return 1;
519 }
520 }
521 return 0;
522 }
523};
524
525/*
526 * A compare function to discriminate chiral atoms, similar to the CIP rules.
527 * This functionality is currently not used.
528 */
529
530const unsigned int ATNUM_CLASS_OFFSET = 10000;
532 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
533 for (unsigned j = 0; j < nbrs.size(); ++j) {
534 unsigned int nbrIdx = nbrs[j].nbrIdx;
535 if (nbrIdx == ATNUM_CLASS_OFFSET) {
536 // Ignore the Hs
537 continue;
538 }
539 const Atom *nbr = dp_atoms[nbrIdx].atom;
540 nbrs[j].nbrSymClass =
541 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
542 }
543 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
544 // FIX: don't want to be doing this long-term
545 }
546
547 int basecomp(int i, int j) const {
548 PRECONDITION(dp_atoms, "no atoms");
549 unsigned int ivi, ivj;
550
551 // always start with the current class:
552 ivi = dp_atoms[i].index;
553 ivj = dp_atoms[j].index;
554 if (ivi < ivj) {
555 return -1;
556 } else if (ivi > ivj) {
557 return 1;
558 }
559
560 // move onto atomic number
561 ivi = dp_atoms[i].atom->getAtomicNum();
562 ivj = dp_atoms[j].atom->getAtomicNum();
563 if (ivi < ivj) {
564 return -1;
565 } else if (ivi > ivj) {
566 return 1;
567 }
568
569 // isotopes:
570 ivi = dp_atoms[i].atom->getIsotope();
571 ivj = dp_atoms[j].atom->getIsotope();
572 if (ivi < ivj) {
573 return -1;
574 } else if (ivi > ivj) {
575 return 1;
576 }
577
578 // atom stereochem:
579 ivi = 0;
580 ivj = 0;
581 std::string cipCode;
582 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
583 cipCode)) {
584 ivi = cipCode == "R" ? 2 : 1;
585 }
586 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
587 cipCode)) {
588 ivj = cipCode == "R" ? 2 : 1;
589 }
590 if (ivi < ivj) {
591 return -1;
592 } else if (ivi > ivj) {
593 return 1;
594 }
595
596 // bond stereo is taken care of in the neighborhood comparison
597 return 0;
598 }
599
600 public:
601 Canon::canon_atom *dp_atoms{nullptr};
602 const ROMol *dp_mol{nullptr};
603 bool df_useNbrs{false};
606 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
607 int operator()(int i, int j) const {
608 PRECONDITION(dp_atoms, "no atoms");
609 PRECONDITION(dp_mol, "no molecule");
610 PRECONDITION(i != j, "bad call");
611 int v = basecomp(i, j);
612 if (v) {
613 return v;
614 }
615
616 if (df_useNbrs) {
617 getAtomNeighborhood(dp_atoms[i].bonds);
618 getAtomNeighborhood(dp_atoms[j].bonds);
619
620 // we do two passes through the neighbor lists. The first just uses the
621 // atomic numbers (by passing the optional 10000 to bondholder::compare),
622 // the second takes the already-computed index into account
623 for (unsigned int ii = 0;
624 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
625 ++ii) {
626 int cmp = bondholder::compare(
627 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
628 if (cmp) {
629 return cmp;
630 }
631 }
632 for (unsigned int ii = 0;
633 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
634 ++ii) {
635 int cmp =
636 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
637 if (cmp) {
638 return cmp;
639 }
640 }
641 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
642 return -1;
643 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
644 return 1;
645 }
646 }
647 return 0;
648 }
649};
650
651/*
652 * Basic canonicalization function to organize the partitions which will be
653 * sorted next.
654 * */
655
656template <typename CompareFunc>
658 int mode, int *order, int *count, int &activeset,
659 int *next, int *changed, char *touchedPartitions) {
660 unsigned int nAtoms = mol.getNumAtoms();
661 int partition;
662 int symclass = 0;
663 int *start;
664 int offset;
665 int index;
666 int len;
667 int i;
668 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
669
670 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
671 while (activeset != -1) {
672 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
673 // std::cerr<<" next: ";
674 // for(unsigned int ii=0;ii<nAtoms;++ii){
675 // std::cerr<<ii<<":"<<next[ii]<<" ";
676 // }
677 // std::cerr<<std::endl;
678 // for(unsigned int ii=0;ii<nAtoms;++ii){
679 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
680 // "<<atoms[order[ii]].index<<std::endl;
681 // }
682
684 activeset = next[partition];
685 next[partition] = -2;
686
688 offset = atoms[partition].index;
689 start = order + offset;
690 // std::cerr<<"\n\n**************************************************************"<<std::endl;
691 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
692 // for(unsigned int ii=0;ii<len;++ii){
693 // std::cerr<<" "<<order[offset+ii]+1;
694 // }
695 // std::cerr<<std::endl;
696 // for(unsigned int ii=0;ii<nAtoms;++ii){
697 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
698 // "<<atoms[order[ii]].index<<std::endl;
699 // }
701 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
702 // std::cerr<<" result:";
703 // for(unsigned int ii=0;ii<nAtoms;++ii){
704 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
705 // "<<atoms[order[ii]].index<<std::endl;
706 // }
707 for (int k = 0; k < len; ++k) {
708 changed[start[k]] = 0;
709 }
710
711 index = start[0];
712 // std::cerr<<" len:"<<len<<" index:"<<index<<"
713 // count:"<<count[index]<<std::endl;
714 for (i = count[index]; i < len; i++) {
715 index = start[i];
716 if (count[index]) {
717 symclass = offset + i;
718 }
719 atoms[index].index = symclass;
720 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
721 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
722 // activeset=index;
723 //}
724 for (unsigned j = 0; j < atoms[index].degree; ++j) {
725 changed[atoms[index].nbrIds[j]] = 1;
726 }
727 }
728 // std::cerr<<std::endl;
729
730 if (mode) {
731 index = start[0];
732 for (i = count[index]; i < len; i++) {
733 index = start[i];
734 for (unsigned j = 0; j < atoms[index].degree; ++j) {
735 unsigned int nbor = atoms[index].nbrIds[j];
736 touchedPartitions[atoms[nbor].index] = 1;
737 }
738 }
739 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
740 if (touchedPartitions[ii]) {
741 partition = order[ii];
742 if ((count[partition] > 1) && (next[partition] == -2)) {
743 next[partition] = activeset;
745 }
747 }
748 }
749 }
750 }
751} // end of RefinePartitions()
752
753template <typename CompareFunc>
754void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
755 int mode, int *order, int *count, int &activeset, int *next,
756 int *changed, char *touchedPartitions) {
757 unsigned int nAtoms = mol.getNumAtoms();
758 int partition;
759 int offset;
760 int index;
761 int len;
762 int oldPart = 0;
763
764 for (unsigned int i = 0; i < nAtoms; i++) {
765 partition = order[i];
766 oldPart = atoms[partition].index;
767 while (count[partition] > 1) {
769 offset = atoms[partition].index + len - 1;
770 index = order[offset];
771 atoms[index].index = offset;
772 count[partition] = len - 1;
773 count[index] = 1;
774
775 // test for ions, water molecules with no
776 if (atoms[index].degree < 1) {
777 continue;
778 }
779 for (unsigned j = 0; j < atoms[index].degree; ++j) {
780 unsigned int nbor = atoms[index].nbrIds[j];
781 touchedPartitions[atoms[nbor].index] = 1;
782 changed[nbor] = 1;
783 }
784
785 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
786 if (touchedPartitions[ii]) {
787 int npart = order[ii];
788 if ((count[npart] > 1) && (next[npart] == -2)) {
789 next[npart] = activeset;
791 }
793 }
794 }
795 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
797 }
798 // not sure if this works each time
799 if (atoms[partition].index != oldPart) {
800 i -= 1;
801 }
802 }
803} // end of BreakTies()
804
806 int *order, int *count,
807 canon_atom *atoms);
808
809RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
810 int *count, int &activeset,
811 int *next, int *changed);
812
814 const ROMol &mol, std::vector<unsigned int> &res, bool breakTies = true,
815 bool includeChirality = true, bool includeIsotopes = true,
816 bool includeAtomMaps = true, bool includeChiralPresence = false,
817 bool includeStereoGroups = true);
818
820 const ROMol &mol, std::vector<unsigned int> &res,
821 const boost::dynamic_bitset<> &atomsInPlay,
822 const boost::dynamic_bitset<> &bondsInPlay,
823 const std::vector<std::string> *atomSymbols,
824 const std::vector<std::string> *bondSymbols, bool breakTies,
827
829 const ROMol &mol, std::vector<unsigned int> &res,
830 const boost::dynamic_bitset<> &atomsInPlay,
831 const boost::dynamic_bitset<> &bondsInPlay,
832 const std::vector<std::string> *atomSymbols = nullptr,
833 bool breakTies = true, bool includeChirality = true,
834 bool includeIsotopes = true, bool includeAtomMaps = true,
835 bool includeChiralPresence = false) {
836 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
839};
840
842 std::vector<unsigned int> &res);
843
845 std::vector<Canon::canon_atom> &atoms,
846 bool includeChirality = true,
847 bool includeStereoGroups = true);
848
849namespace detail {
851 std::vector<Canon::canon_atom> &atoms,
852 bool includeChirality,
853 const std::vector<std::string> *atomSymbols,
854 const std::vector<std::string> *bondSymbols,
855 const boost::dynamic_bitset<> &atomsInPlay,
856 const boost::dynamic_bitset<> &bondsInPlay,
857 bool needsInit);
858template <typename T>
859void rankWithFunctor(T &ftor, bool breakTies, int *order,
860 bool useSpecial = false, bool useChirality = false,
861 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
862 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
863
864} // namespace detail
865
866} // namespace Canon
867} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:75
int getAtomicNum() const
returns our atomic number
Definition Atom.h:134
@ CHI_TETRAHEDRAL_CW
tetrahedral: clockwise rotation (SMILES @@)
Definition Atom.h:101
@ CHI_TETRAHEDRAL_CCW
tetrahedral: counter-clockwise rotation (SMILES
Definition Atom.h:102
BondType
the type of Bond
Definition Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:481
int operator()(int i, int j) const
Definition new_canon.h:488
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition new_canon.h:605
int operator()(int i, int j) const
Definition new_canon.h:607
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:143
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:200
const std::vector< StereoGroup > & getStereoGroups() const
Gets a reference to the groups of atoms with relative stereochemistry.
Definition ROMol.h:790
unsigned int getNumAtoms() const
returns our number of atoms
Definition ROMol.h:421
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true, bool includeAtomMaps=true, bool includeChiralPresence=false, bool includeStereoGroups=true)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true, bool includeStereoGroups=true)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope, bool includeAtomMaps, bool includeChiralPresence)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition new_canon.h:530
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:754
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:657
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
Std stuff.
StereoGroupType
Definition StereoGroup.h:31
bool rdvalue_is(const RDValue_cast_t)
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition utils.h:54
const std::string * p_symbol
Definition new_canon.h:41
Bond::BondType bondType
Definition new_canon.h:34
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition new_canon.h:64
bool operator<(const bondholder &o) const
Definition new_canon.h:63
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:53
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:46
unsigned int bondStereo
Definition new_canon.h:35
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition new_canon.h:68
unsigned int nbrSymClass
Definition new_canon.h:37
std::vector< bondholder > bonds
Definition new_canon.h:114
std::unique_ptr< int[]> nbrIds
Definition new_canon.h:109
std::vector< int > revistedNeighbors
Definition new_canon.h:113
std::vector< int > neighborNum
Definition new_canon.h:112