RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2019 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOL_OPS_H_
13 #define _RD_MOL_OPS_H_
14 
15 #include <vector>
16 #include <map>
17 #include <list>
19 #include <boost/smart_ptr.hpp>
20 #include <boost/dynamic_bitset.hpp>
22 #include <RDGeneral/types.h>
23 #include "SanitException.h"
24 
25 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26 namespace RDKit {
27 class ROMol;
28 class RWMol;
29 class Atom;
30 class Bond;
31 class Conformer;
32 typedef std::vector<double> INVAR_VECT;
33 typedef INVAR_VECT::iterator INVAR_VECT_I;
34 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35 
36 //! \brief Groups a variety of molecular query and transformation operations.
37 namespace MolOps {
38 
39 //! return the number of electrons available on an atom to donate for
40 // aromaticity
41 /*!
42  The result is determined using the default valency, number of lone pairs,
43  number of bonds and the formal charge. Note that the atom may not donate
44  all of these electrons to a ring for aromaticity (also used in Conjugation
45  and hybridization code).
46 
47  \param at the atom of interest
48 
49  \return the number of electrons
50 */
52 
53 //! sums up all atomic formal charges and returns the result
55 
56 //! returns whether or not the given Atom is involved in a conjugated bond
58 
59 //! find fragments (disconnected components of the molecular graph)
60 /*!
61 
62  \param mol the molecule of interest
63  \param mapping used to return the mapping of Atoms->fragments.
64  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65  and will contain the fragment assignment for each Atom
66 
67  \return the number of fragments found.
68 
69 */
70 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71  std::vector<int> &mapping);
72 //! find fragments (disconnected components of the molecular graph)
73 /*!
74 
75  \param mol the molecule of interest
76  \param frags used to return the Atoms in each fragment
77  On return \c mapping will be \c numFrags long, and each entry
78  will contain the indices of the Atoms in that fragment.
79 
80  \return the number of fragments found.
81 
82 */
84  const ROMol &mol, std::vector<std::vector<int>> &frags);
85 
86 //! splits a molecule into its component fragments
87 // (disconnected components of the molecular graph)
88 /*!
89 
90  \param mol the molecule of interest
91  \param sanitizeFrags toggles sanitization of the fragments after
92  they are built
93  \param frags used to return the mapping of Atoms->fragments.
94  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
95  on return and will contain the fragment assignment for each Atom
96  \param fragsMolAtomMapping used to return the Atoms in each fragment
97  On return \c mapping will be \c numFrags long, and each entry
98  will contain the indices of the Atoms in that fragment.
99  \param copyConformers toggles copying conformers of the fragments after
100  they are built
101  \return a vector of the fragments as smart pointers to ROMols
102 
103 */
104 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
105  const ROMol &mol, bool sanitizeFrags = true,
106  std::vector<int> *frags = nullptr,
107  std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
108  bool copyConformers = true);
109 
110 //! splits a molecule into pieces based on labels assigned using a query
111 /*!
112 
113  \param mol the molecule of interest
114  \param query the query used to "label" the molecule for fragmentation
115  \param sanitizeFrags toggles sanitization of the fragments after
116  they are built
117  \param whiteList if provided, only labels in the list will be kept
118  \param negateList if true, the white list logic will be inverted: only labels
119  not in the list will be kept
120 
121  \return a map of the fragments and their labels
122 
123 */
124 template <typename T>
125 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
126 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
127  bool sanitizeFrags = true,
128  const std::vector<T> *whiteList = nullptr,
129  bool negateList = false);
130 
131 #if 0
132  //! finds a molecule's minimum spanning tree (MST)
133  /*!
134  \param mol the molecule of interest
135  \param mst used to return the MST as a vector of bond indices
136  */
137  RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
138 #endif
139 
140 //! calculates Balaban's J index for the molecule
141 /*!
142  \param mol the molecule of interest
143  \param useBO toggles inclusion of the bond order in the calculation
144  (when false, we're not really calculating the J value)
145  \param force forces the calculation (instead of using cached results)
146  \param bondPath when included, only paths using bonds whose indices occur
147  in this vector will be included in the calculation
148  \param cacheIt If this is true, the calculated value will be cached
149  as a property on the molecule
150  \return the J index
151 
152 */
154  const ROMol &mol, bool useBO = true, bool force = false,
155  const std::vector<int> *bondPath = nullptr, bool cacheIt = true);
156 //! \overload
157 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(double *distMat, int nb, int nAts);
158 
159 //! \name Dealing with hydrogens
160 //{@
161 
162 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
163 /*!
164  \param mol the molecule to add Hs to
165  \param explicitOnly (optional) if this \c true, only explicit Hs will be
166  added
167  \param addCoords (optional) If this is true, estimates for the atomic
168  coordinates
169  of the added Hs will be used.
170  \param onlyOnAtoms (optional) if provided, this should be a vector of
171  IDs of the atoms that will be considered for H addition.
172  \param addResidueInfo (optional) if this is true, add residue info to
173  hydrogen atoms (useful for PDB files).
174 
175  \return the new molecule
176 
177  <b>Notes:</b>
178  - it makes no sense to use the \c addCoords option if the molecule's
179  heavy
180  atoms don't already have coordinates.
181  - the caller is responsible for <tt>delete</tt>ing the pointer this
182  returns.
183  */
184 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
185  bool addCoords = false,
186  const UINT_VECT *onlyOnAtoms = nullptr,
187  bool addResidueInfo = false);
188 //! \overload
189 // modifies the molecule in place
190 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
191  bool addCoords = false,
192  const UINT_VECT *onlyOnAtoms = nullptr,
193  bool addResidueInfo = false);
194 
195 //! returns a copy of a molecule with hydrogens removed
196 /*!
197  \param mol the molecule to remove Hs from
198  \param implicitOnly (optional) if this \c true, only implicit Hs will be
199  removed
200  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
201  are removed
202  from the graph, the heavy atom to which they are bound will have its
203  counter of
204  explicit Hs increased.
205  \param sanitize: (optional) If this is \c true, the final molecule will be
206  sanitized
207 
208  \return the new molecule
209 
210  <b>Notes:</b>
211  - Hydrogens which aren't connected to a heavy atom will not be
212  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
213  all atoms removed.
214  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
215  will not be removed.
216  - two coordinate Hs, like the central H in C[H-]C, will not be removed
217  - Hs connected to dummy atoms will not be removed
218  - Hs that are part of the definition of double bond Stereochemistry
219  will not be removed
220  - Hs that are not connected to anything else will not be removed
221  - Hs that have a query defined (i.e. hasQuery() returns true) will not
222  be removed
223 
224  - the caller is responsible for <tt>delete</tt>ing the pointer this
225  returns.
226 */
228  bool implicitOnly = false,
229  bool updateExplicitCount = false,
230  bool sanitize = true);
231 //! \overload
232 // modifies the molecule in place
233 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
234  bool updateExplicitCount = false,
235  bool sanitize = true);
237  bool removeDegreeZero = false; /**< hydrogens that have no bonds */
238  bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
239  bool removeOnlyHNeighbors =
240  false; /**< hydrogens with bonds only to other hydrogens */
241  bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
242  bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
243  isotopes and keeps track of the heavy atom the isotopes were attached to in
244  the private _isotopicHs atom property, so they are re-added by AddHs() as the
245  original isotopes if possible*/
246  bool removeDummyNeighbors =
247  false; /**< hydrogens with at least one dummy-atom neighbor */
248  bool removeDefiningBondStereo =
249  false; /**< hydrogens defining bond stereochemistry */
250  bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
251  bool removeWithQuery = false; /**< hydrogens with queries defined */
252  bool removeMapped = true; /**< mapped hydrogens */
253  bool removeInSGroups = false; /**< part of a SubstanceGroup */
254  bool showWarnings = true; /**< display warnings for Hs that are not removed */
255  bool removeNonimplicit = true; /**< DEPRECATED equivalent of implicitOnly */
256  bool updateExplicitCount =
257  false; /**< DEPRECATED equivalent of updateExplicitCount */
258  bool removeHydrides = true; /**< Removing Hydrides */
259 };
260 //! \overload
261 // modifies the molecule in place
263  bool sanitize = true);
264 //! \overload
265 // The caller owns the pointer this returns
267  const RemoveHsParameters &ps,
268  bool sanitize = true);
269 
270 //! removes all Hs from a molecule
271 RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
272 //! \overload
273 // The caller owns the pointer this returns
275  bool sanitize = true);
276 
277 //! returns a copy of a molecule with hydrogens removed and added as queries
278 //! to the heavy atoms to which they are bound.
279 /*!
280  This is really intended to be used with molecules that contain QueryAtoms
281 
282  \param mol the molecule to remove Hs from
283 
284  \return the new molecule
285 
286  <b>Notes:</b>
287  - Atoms that do not already have hydrogen count queries will have one
288  added, other H-related queries will not be touched. Examples:
289  - C[H] -> [C;!H0]
290  - [C;H1][H] -> [C;H1]
291  - [C;H2][H] -> [C;H2]
292  - Hydrogens which aren't connected to a heavy atom will not be
293  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
294  all atoms removed.
295  - the caller is responsible for <tt>delete</tt>ing the pointer this
296  returns.
297  - By default all hydrogens are removed, however if
298  mergeUnmappedOnly is true, any hydrogen participating
299  in an atom map will be retained
300 
301 */
303  bool mergeUnmappedOnly = false);
304 //! \overload
305 // modifies the molecule in place
307  bool mergeUnmappedOnly = false);
308 
309 typedef enum {
316  ADJUST_IGNOREALL = 0xFFFFFFF
318 
319 //! Parameters controlling the behavior of MolOps::adjustQueryProperties
320 /*!
321 
322  Note that some of the options here are either directly contradictory or make
323  no sense when combined with each other. We generally assume that client code
324  is doing something sensible and don't attempt to detect possible conflicts or
325  problems.
326 
327 */
329  bool adjustDegree = true; /**< add degree queries */
330  std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
331 
332  bool adjustRingCount = false; /**< add ring-count queries */
333  std::uint32_t adjustRingCountFlags =
335 
336  bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
337  labels to any-atom queries */
338 
339  bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
340 
341  bool makeBondsGeneric =
342  false; /**< convert bonds to generic queries (any bonds) */
343  std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
344 
345  bool makeAtomsGeneric =
346  false; /**< convert atoms to generic queries (any atoms) */
347  std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
348 
349  bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
350  overall degree */
351  std::uint32_t adjustHeavyDegreeFlags =
353 
354  bool adjustRingChain = false; /**< add ring-chain queries */
355  std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
356 
357  bool useStereoCareForBonds =
358  false; /**< remove stereochemistry info from double bonds that do not have
359  the stereoCare property set */
360 
361  bool adjustConjugatedFiveRings =
362  false; /**< sets bond queries in conjugated five-rings to
363  SINGLE|DOUBLE|AROMATIC */
364 
365  bool setMDLFiveRingAromaticity =
366  false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
367  software as documented in the Chemical Representation Guide */
368 
369  bool adjustSingleBondsToDegreeOneNeighbors =
370  false; /**< sets single bonds between aromatic atoms and degree one
371  neighbors to SINGLE|AROMATIC */
372 
373  bool adjustSingleBondsBetweenAromaticAtoms =
374  false; /**< sets non-ring single bonds between two aromatic atoms to
375  SINGLE|AROMATIC */
376  //! \brief returns an AdjustQueryParameters object with all adjustments
377  //! disabled
380  res.adjustDegree = false;
381  res.makeDummiesQueries = false;
382  res.aromatizeIfPossible = false;
383  return res;
384  }
386 };
387 
388 //! updates an AdjustQueryParameters object from a JSON string
390  MolOps::AdjustQueryParameters &p, const std::string &json);
391 
392 //! returns a copy of a molecule with query properties adjusted
393 /*!
394  \param mol the molecule to adjust
395  \param params controls the adjustments made
396 
397  \return the new molecule, the caller owns the memory
398 */
400  const ROMol &mol, const AdjustQueryParameters *params = nullptr);
401 //! \overload
402 // modifies the molecule in place
404  RWMol &mol, const AdjustQueryParameters *params = nullptr);
405 
406 //! returns a copy of a molecule with the atoms renumbered
407 /*!
408 
409  \param mol the molecule to work with
410  \param newOrder the new ordering of the atoms (should be numAtoms long)
411  for example: if newOrder is [3,2,0,1], then atom 3 in the original
412  molecule will be atom 0 in the new one
413 
414  \return the new molecule
415 
416  <b>Notes:</b>
417  - the caller is responsible for <tt>delete</tt>ing the pointer this
418  returns.
419 
420 */
422  const ROMol &mol, const std::vector<unsigned int> &newOrder);
423 
424 //@}
425 
426 //! \name Sanitization
427 //@{
428 
429 typedef enum {
441  SANITIZE_ALL = 0xFFFFFFF
443 
444 //! \brief carries out a collection of tasks for cleaning up a molecule and
445 // ensuring
446 //! that it makes "chemical sense"
447 /*!
448  This functions calls the following in sequence
449  -# MolOps::cleanUp()
450  -# mol.updatePropertyCache()
451  -# MolOps::symmetrizeSSSR()
452  -# MolOps::Kekulize()
453  -# MolOps::assignRadicals()
454  -# MolOps::setAromaticity()
455  -# MolOps::setConjugation()
456  -# MolOps::setHybridization()
457  -# MolOps::cleanupChirality()
458  -# MolOps::adjustHs()
459 
460  \param mol : the RWMol to be cleaned
461 
462  \param operationThatFailed : the first (if any) sanitization operation that
463  fails is set here.
464  The values are taken from the \c SanitizeFlags
465  enum. On success, the value is \c
466  SanitizeFlags::SANITIZE_NONE
467 
468  \param sanitizeOps : the bits here are used to set which sanitization
469  operations are carried out. The elements of the \c
470  SanitizeFlags enum define the operations.
471 
472  <b>Notes:</b>
473  - If there is a failure in the sanitization, a \c MolSanitizeException
474  will be thrown.
475  - in general the user of this function should cast the molecule following
476  this function to a ROMol, so that new atoms and bonds cannot be added to
477  the molecule and screw up the sanitizing that has been done here
478 */
480  unsigned int &operationThatFailed,
481  unsigned int sanitizeOps = SANITIZE_ALL);
482 //! \overload
484 
485 //! \brief Identifies chemistry problems (things that don't make chemical
486 //! sense) in a molecule
487 /*!
488  This functions uses the operations in sanitizeMol but does not change
489  the input structure and returns a list of the problems encountered instead
490  of stopping at the first failure,
491 
492  The problems this looks for come from the sanitization operations:
493  -# mol.updatePropertyCache() : Unreasonable valences
494  -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
495  in rings, aromatic bonds to non-aromatic atoms.
496 
497  \param mol : the RWMol to be cleaned
498 
499  \param sanitizeOps : the bits here are used to set which sanitization
500  operations are carried out. The elements of the \c
501  SanitizeFlags enum define the operations.
502 
503  \return a vector of \c MolSanitizeException values that indicate what
504  problems were encountered
505 
506 */
508 std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
509  const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
510 
511 //! Possible aromaticity models
512 /*!
513 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
514 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
515 Book)
516 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
517 does not consider the outer envelope of fused rings)
518 - \c AROMATICITY_MDL
519 - \c AROMATICITY_CUSTOM uses a caller-provided function
520 */
521 typedef enum {
522  AROMATICITY_DEFAULT = 0x0, ///< future proofing
526  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
528 
529 //! Sets up the aromaticity for a molecule
530 /*!
531 
532  This is what happens here:
533  -# find all the simple rings by calling the findSSSR function
534  -# loop over all the Atoms in each ring and mark them if they are
535  candidates
536  for aromaticity. A ring atom is a candidate if it can spare electrons
537  to the ring and if it's from the first two rows of the periodic table.
538  -# based on the candidate atoms, mark the rings to be either candidates
539  or non-candidates. A ring is a candidate only if all its atoms are
540  candidates
541  -# apply Hueckel rule to each of the candidate rings to check if the ring
542  can be
543  aromatic
544 
545  \param mol the RWMol of interest
546  \param model the aromaticity model to use
547  \param func a custom function for assigning aromaticity (only used when
548  model=\c AROMATICITY_CUSTOM)
549 
550  \return >0 on success, <= 0 otherwise
551 
552  <b>Assumptions:</b>
553  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
554  been called)
555 
556 */
559  int (*func)(RWMol &) = nullptr);
560 
561 //! Designed to be called by the sanitizer to handle special cases before
562 // anything is done.
563 /*!
564 
565  Currently this:
566  - modifies nitro groups, so that the nitrogen does not have an
567  unreasonable valence of 5, as follows:
568  - the nitrogen gets a positive charge
569  - one of the oxygens gets a negative chage and the double bond to
570  this oxygen is changed to a single bond The net result is that nitro groups
571  can be counted on to be: \c "[N+](=O)[O-]"
572  - modifies halogen-oxygen containing species as follows:
573  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
574  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
575  \c [Cl,Br,I](=O)O -> [X+]([O-])O
576  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
577 
578  \param mol the molecule of interest
579 
580 */
582 
583 //! Called by the sanitizer to assign radical counts to atoms
585 
586 //! adjust the number of implicit and explicit Hs for special cases
587 /*!
588 
589  Currently this:
590  - modifies aromatic nitrogens so that, when appropriate, they have an
591  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
592 
593  \param mol the molecule of interest
594 
595  <b>Assumptions</b>
596  - this is called after the molecule has been sanitized,
597  aromaticity has been perceived, and the implicit valence of
598  everything has been calculated.
599 
600 */
602 
603 //! Kekulizes the molecule
604 /*!
605 
606  \param mol the molecule of interest
607  \param markAtomsBonds if this is set to true, \c isAromatic boolean
608  settings on both the Bonds and Atoms are turned to false following the
609  Kekulization, otherwise they are left alone in their original state. \param
610  maxBackTracks the maximum number of attempts at back-tracking. The
611  algorithm
612  uses a back-tracking procedure to revisit a previous
613  setting of
614  double bond if we hit a wall in the kekulization
615  process
616 
617  <b>Notes:</b>
618  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
619  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
620  RDKit::Bond::SINGLE
621  or RDKit::Bond::DOUBLE during Kekulization.
622 
623 */
624 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
625  unsigned int maxBackTracks = 100);
626 
627 //! flags the molecule's conjugated bonds
629 
630 //! calculates and sets the hybridization of all a molecule's Stoms
632 
633 // @}
634 
635 //! \name Ring finding and SSSR
636 //@{
637 
638 //! finds a molecule's Smallest Set of Smallest Rings
639 /*!
640  Currently this implements a modified form of Figueras algorithm
641  (JCICS - Vol. 36, No. 5, 1996, 986-991)
642 
643  \param mol the molecule of interest
644  \param res used to return the vector of rings. Each entry is a vector with
645  atom indices. This information is also stored in the molecule's
646  RingInfo structure, so this argument is optional (see overload)
647 
648  \return number of smallest rings found
649 
650  Base algorithm:
651  - The original algorithm starts by finding representative degree 2
652  nodes.
653  - Representative because if a series of deg 2 nodes are found only
654  one of them is picked.
655  - The smallest ring around each of them is found.
656  - The bonds that connect to this degree 2 node are them chopped off,
657  yielding
658  new deg two nodes
659  - The process is repeated on the new deg 2 nodes.
660  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
661  with it is found. A bond from this is "carefully" (look in the paper)
662  selected and chopped, yielding deg 2 nodes. The process is same as
663  above once this is done.
664 
665  Our Modifications:
666  - If available, more than one smallest ring around a representative deg 2
667  node will be computed and stored
668  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
669  available)
670  and all the bond to that node are chopped.
671  - The extra rings that were found in this process are removed after all
672  the nodes have been covered.
673 
674  These changes were motivated by several factors:
675  - We believe the original algorithm fails to find the correct SSSR
676  (finds the correct number of them but the wrong ones) on some sample
677  mols
678  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
679  done. The extra rings this process adds can be quite useful.
680 */
682  std::vector<std::vector<int>> &res);
683 //! \overload
685  const ROMol &mol, std::vector<std::vector<int>> *res = nullptr);
686 
687 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
688 /*!
689  \b NOTE: though the RingInfo structure is populated by this function,
690  the only really reliable calls that can be made are to check if
691  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
692  return values >0
693 */
695 
697 
698 //! symmetrize the molecule's Smallest Set of Smallest Rings
699 /*!
700  SSSR rings obatined from "findSSSR" can be non-unique in some case.
701  For example, cubane has five SSSR rings, not six as one would hope.
702 
703  This function adds additional rings to the SSSR list if necessary
704  to make the list symmetric, e.g. all atoms in cubane will be part of the
705  same number of SSSRs. This function choses these extra rings from the extra
706  rings computed and discarded during findSSSR. The new ring are chosen such
707  that:
708  - replacing a same sized ring in the SSSR list with an extra ring yields
709  the same union of bond IDs as the original SSSR list
710 
711  \param mol - the molecule of interest
712  \param res used to return the vector of rings. Each entry is a vector with
713  atom indices. This information is also stored in the molecule's
714  RingInfo structure, so this argument is optional (see overload)
715 
716  \return the total number of rings = (new rings + old SSSRs)
717 
718  <b>Notes:</b>
719  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
720  first
721 */
723  std::vector<std::vector<int>> &res);
724 //! \overload
726 
727 //@}
728 
729 //! \name Shortest paths and other matrices
730 //@{
731 
732 //! returns a molecule's adjacency matrix
733 /*!
734  \param mol the molecule of interest
735  \param useBO toggles use of bond orders in the matrix
736  \param emptyVal sets the empty value (for non-adjacent atoms)
737  \param force forces calculation of the matrix, even if already
738  computed
739  \param propNamePrefix used to set the cached property name
740 
741  \return the adjacency matrix.
742 
743  <b>Notes</b>
744  - The result of this is cached in the molecule's local property
745  dictionary, which will handle deallocation. The caller should <b>not</b> \c
746  delete this pointer.
747 
748 */
750  const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
751  const char *propNamePrefix = nullptr,
752  const boost::dynamic_bitset<> *bondsToUse = nullptr);
753 
754 //! Computes the molecule's topological distance matrix
755 /*!
756  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
757 
758  \param mol the molecule of interest
759  \param useBO toggles use of bond orders in the matrix
760  \param useAtomWts sets the diagonal elements of the result to
761  6.0/(atomic number) so that the matrix can be used to calculate
762  Balaban J values. This does not affect the bond weights.
763  \param force forces calculation of the matrix, even if already
764  computed
765  \param propNamePrefix used to set the cached property name
766 
767  \return the distance matrix.
768 
769  <b>Notes</b>
770  - The result of this is cached in the molecule's local property
771  dictionary, which will handle deallocation. The caller should <b>not</b> \c
772  delete this pointer.
773 
774 
775 */
777  const ROMol &mol, bool useBO = false, bool useAtomWts = false,
778  bool force = false, const char *propNamePrefix = nullptr);
779 
780 //! Computes the molecule's topological distance matrix
781 /*!
782  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
783 
784  \param mol the molecule of interest
785  \param activeAtoms only elements corresponding to these atom indices
786  will be included in the calculation
787  \param bonds only bonds found in this list will be included in the
788  calculation
789  \param useBO toggles use of bond orders in the matrix
790  \param useAtomWts sets the diagonal elements of the result to
791  6.0/(atomic number) so that the matrix can be used to calculate
792  Balaban J values. This does not affect the bond weights.
793 
794  \return the distance matrix.
795 
796  <b>Notes</b>
797  - The results of this call are not cached, the caller <b>should</b> \c
798  delete
799  this pointer.
800 
801 
802 */
804  const ROMol &mol, const std::vector<int> &activeAtoms,
805  const std::vector<const Bond *> &bonds, bool useBO = false,
806  bool useAtomWts = false);
807 
808 //! Computes the molecule's 3D distance matrix
809 /*!
810 
811  \param mol the molecule of interest
812  \param confId the conformer to use
813  \param useAtomWts sets the diagonal elements of the result to
814  6.0/(atomic number)
815  \param force forces calculation of the matrix, even if already
816  computed
817  \param propNamePrefix used to set the cached property name
818  (if set to an empty string, the matrix will not be
819  cached)
820 
821  \return the distance matrix.
822 
823  <b>Notes</b>
824  - If propNamePrefix is not empty the result of this is cached in the
825  molecule's local property dictionary, which will handle deallocation.
826  In other cases the caller is responsible for freeing the memory.
827 
828 */
830  const ROMol &mol, int confId = -1, bool useAtomWts = false,
831  bool force = false, const char *propNamePrefix = nullptr);
832 //! Find the shortest path between two atoms
833 /*!
834  Uses the Bellman-Ford algorithm
835 
836  \param mol molecule of interest
837  \param aid1 index of the first atom
838  \param aid2 index of the second atom
839 
840  \return an std::list with the indices of the atoms along the shortest
841  path
842 
843  <b>Notes:</b>
844  - the starting and end atoms are included in the path
845  - if no path is found, an empty path is returned
846 
847 */
848 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
849  int aid2);
850 
851 //@}
852 
853 #if 0
854  //! \name Canonicalization
855  //@{
856 
857  //! assign a canonical ordering to a molecule's atoms
858  /*!
859  The algorithm used here is a modification of the published Daylight canonical
860  smiles algorithm (i.e. it uses atom invariants and products of primes).
861 
862  \param mol the molecule of interest
863  \param ranks used to return the ranks
864  \param breakTies toggles breaking of ties (see below)
865  \param includeChirality toggles inclusion of chirality in the invariants
866  \param includeIsotopes toggles inclusion of isotopes in the invariants
867  \param rankHistory used to return the rank history (see below)
868 
869  <b>Notes:</b>
870  - Tie breaking should be done when it's important to have a full ordering
871  of the atoms (e.g. when generating canonical traversal trees). If it's
872  acceptable to have ties between symmetry-equivalent atoms (e.g. when
873  generating CIP codes), tie breaking can/should be skipped.
874  - if the \c rankHistory argument is provided, the evolution of the ranks of
875  individual atoms will be tracked. The \c rankHistory pointer should be
876  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
877  */
878  RDKIT_GRAPHMOL_EXPORT void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
879  bool breakTies=true,
880  bool includeChirality=true,
881  bool includeIsotopes=true,
882  std::vector<std::vector<int> > *rankHistory=0);
883  //! assign a canonical ordering to a sub-molecule's atoms
884  /*!
885  The algorithm used here is a modification of the published Daylight canonical
886  smiles algorithm (i.e. it uses atom invariants and products of primes).
887 
888  \param mol the molecule of interest
889  \param atomsToUse atoms to be included
890  \param bondsToUse bonds to be included
891  \param atomSymbols symbols to use for the atoms in the output (these are
892  used in place of atomic number and isotope information)
893  \param ranks used to return the ranks
894  \param breakTies toggles breaking of ties (see below)
895  \param rankHistory used to return the rank history (see below)
896 
897  <b>Notes:</b>
898  - Tie breaking should be done when it's important to have a full ordering
899  of the atoms (e.g. when generating canonical traversal trees). If it's
900  acceptable to have ties between symmetry-equivalent atoms (e.g. when
901  generating CIP codes), tie breaking can/should be skipped.
902  - if the \c rankHistory argument is provided, the evolution of the ranks of
903  individual atoms will be tracked. The \c rankHistory pointer should be
904  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
905  */
906  RDKIT_GRAPHMOL_EXPORT void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
907  const boost::dynamic_bitset<> &atomsToUse,
908  const boost::dynamic_bitset<> &bondsToUse,
909  const std::vector<std::string> *atomSymbols=0,
910  const std::vector<std::string> *bondSymbols=0,
911  bool breakTies=true,
912  std::vector<std::vector<int> > *rankHistory=0);
913 
914  // @}
915 #endif
916 //! \name Stereochemistry
917 //@{
918 
919 //! removes bogus chirality markers (those on non-sp3 centers):
921 
922 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
923 /*!
924  \param mol the molecule of interest
925  \param confId the conformer to use
926  \param replaceExistingTags if this flag is true, any existing atomic chiral
927  tags will be replaced
928 
929  If the conformer provided is not a 3D conformer, nothing will be done.
930 */
932  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
933 
934 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
935 //! stereo flags to its bonds
936 /*!
937 
938  \param mol the molecule of interest
939  \param confId the conformer to use
940  \param replaceExistingTags if this flag is true, any existing info about
941  stereochemistry will be replaced
942 
943  If the conformer provided is not a 3D conformer, nothing will be done.
944 */
946  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
947 
948 //! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
949 //! stereo flags to its bonds
950 /*!
951 
952  \param mol the molecule of interest
953  \param confId the conformer to use
954  \param replaceExistingTags if this flag is true, any existing info about
955  stereochemistry will be replaced
956 */
958  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
959 
960 //! \deprecated: this function will be removed in a future release. Use
961 //! setDoubleBondNeighborDirections() instead
963  int confId = -1);
964 //! Sets bond directions based on double bond stereochemistry
966  ROMol &mol, const Conformer *conf = nullptr);
967 
968 //! Assign CIS/TRANS bond stereochemistry tags based on neighboring directions
970 
971 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
972 /*!
973  Does the CIP stereochemistry assignment for the molecule's atoms
974  (R/S) and double bond (Z/E). Chiral atoms will have a property
975  '_CIPCode' indicating their chiral code.
976 
977  \param mol the molecule to use
978  \param cleanIt if true, any existing values of the property `_CIPCode`
979  will be cleared, atoms with a chiral specifier that aren't
980  actually chiral (e.g. atoms with duplicate
981  substituents or only 2 substituents, etc.) will have
982  their chiral code set to CHI_UNSPECIFIED. Bonds with
983  STEREOCIS/STEREOTRANS specified that have duplicate
984  substituents based upon the CIP atom ranks will be
985  marked STEREONONE.
986  \param force causes the calculation to be repeated even if it has
987  already been done
988  \param flagPossibleStereoCenters set the _ChiralityPossible property on
989  atoms that are possible stereocenters
990 
991  <b>Notes:M</b>
992  - Throughout we assume that we're working with a hydrogen-suppressed
993  graph.
994 
995 */
997  ROMol &mol, bool cleanIt = false, bool force = false,
998  bool flagPossibleStereoCenters = false);
999 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1000 //(i.e. Z/E)
1001 /*!
1002 
1003  \param mol the molecule of interest
1004 */
1006 
1007 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
1008 //! Bond::STEREOANY.
1009 /*!
1010  \param mol the molecule of interest
1011  \param cleanIt toggles removal of stereo flags from double bonds that can
1012  not have stereochemistry
1013 
1014  This function finds any double bonds that can potentially be part of
1015  a cis/trans system. No attempt is made here to mark them cis or
1016  trans. No attempt is made to detect double bond stereo in ring systems.
1017 
1018  This function is useful in the following situations:
1019  - when parsing a mol file; for the bonds marked here, coordinate
1020  information on the neighbors can be used to indentify cis or trans states
1021  - when writing a mol file; bonds that can be cis/trans but not marked as
1022  either need to be specially marked in the mol file
1023  - finding double bonds with unspecified stereochemistry so they
1024  can be enumerated for downstream 3D tools
1025 
1026  The CIPranks on the neighboring atoms are checked in this function. The
1027  _CIPCode property if set to any on the double bond.
1028 */
1030  bool cleanIt = false);
1031 //@}
1032 
1033 //! \brief Uses the molParity atom property to assign ChiralType to a molecule's
1034 //! atoms
1035 /*!
1036  \param mol the molecule of interest
1037  \param replaceExistingTags if this flag is true, any existing atomic chiral
1038  tags will be replaced
1039 */
1041  ROMol &mol, bool replaceExistingTags = true);
1042 
1043 //! returns the number of atoms which have a particular property set
1045  const ROMol &mol, std::string prop);
1046 
1047 }; // end of namespace MolOps
1048 }; // end of namespace RDKit
1049 
1050 #endif
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition: Atom.h:69
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:43
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:346
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:521
@ AROMATICITY_RDKIT
Definition: MolOps.h:523
@ AROMATICITY_MDL
Definition: MolOps.h:525
@ AROMATICITY_CUSTOM
use a function
Definition: MolOps.h:526
@ AROMATICITY_DEFAULT
future proofing
Definition: MolOps.h:522
@ AROMATICITY_SIMPLE
Definition: MolOps.h:524
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=nullptr, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=nullptr, bool cacheIt=true)
calculates Balaban's J index for the molecule
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
@ SANITIZE_ALL
Definition: MolOps.h:441
@ SANITIZE_SETAROMATICITY
Definition: MolOps.h:436
@ SANITIZE_NONE
Definition: MolOps.h:430
@ SANITIZE_PROPERTIES
Definition: MolOps.h:432
@ SANITIZE_SETCONJUGATION
Definition: MolOps.h:437
@ SANITIZE_SYMMRINGS
Definition: MolOps.h:433
@ SANITIZE_ADJUSTHS
Definition: MolOps.h:440
@ SANITIZE_CLEANUPCHIRALITY
Definition: MolOps.h:439
@ SANITIZE_FINDRADICALS
Definition: MolOps.h:435
@ SANITIZE_KEKULIZE
Definition: MolOps.h:434
@ SANITIZE_SETHYBRIDIZATION
Definition: MolOps.h:438
@ SANITIZE_CLEANUP
Definition: MolOps.h:431
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
Assign CIS/TRANS bond stereochemistry tags based on neighboring directions.
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int >> &res)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int >> &res)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
AdjustQueryWhichFlags
Definition: MolOps.h:309
@ ADJUST_IGNORERINGS
Definition: MolOps.h:312
@ ADJUST_IGNORENONE
Definition: MolOps.h:310
@ ADJUST_IGNOREMAPPED
Definition: MolOps.h:315
@ ADJUST_IGNORENONDUMMIES
Definition: MolOps.h:314
@ ADJUST_IGNOREDUMMIES
Definition: MolOps.h:313
@ ADJUST_IGNORECHAINS
Definition: MolOps.h:311
@ ADJUST_IGNOREALL
Definition: MolOps.h:316
Std stuff.
Definition: Abbreviations.h:17
std::vector< double > INVAR_VECT
Definition: MolOps.h:31
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:33
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:34
std::vector< UINT > UINT_VECT
Definition: types.h:290
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition: MolOps.h:328
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition: MolOps.h:378