RDKit
Open-source cheminformatics and machine learning.
Reaction.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2007-2018, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 
33 #include <RDGeneral/export.h>
34 #ifndef RD_REACTION_H_17Aug2006
35 #define RD_REACTION_H_17Aug2006
36 
37 #include <GraphMol/RDKitBase.h>
38 #include <RDGeneral/RDProps.h>
40 #include <vector>
41 
42 namespace RDKit {
43 class ReactionPickler;
44 
45 //! used to indicate an error in the chemical reaction engine
47  : public std::exception {
48  public:
49  //! construct with an error message
50  explicit ChemicalReactionException(const char *msg) : _msg(msg){};
51  //! construct with an error message
52  explicit ChemicalReactionException(const std::string msg) : _msg(msg){};
53  //! get the error message
54  const char *what() const noexcept override { return _msg.c_str(); };
56 
57  private:
58  std::string _msg;
59 };
60 
61 //! This is a class for storing and applying general chemical reactions.
62 /*!
63  basic usage will be something like:
64 
65  \verbatim
66  ChemicalReaction rxn;
67  rxn.addReactantTemplate(r1);
68  rxn.addReactantTemplate(r2);
69  rxn.addProductTemplate(p1);
70  rxn.initReactantMatchers();
71 
72  MOL_SPTR_VECT prods;
73  for(MOL_SPTR_VECT::const_iterator r1It=reactantSet1.begin();
74  r1It!=reactantSet1.end();++r1It;){
75  for(MOL_SPTR_VECT::const_iterator r2It=reactantSet2.begin();
76  r2It!=reactantSet2.end();++r2It;){
77  MOL_SPTR_VECT rVect(2);
78  rVect[0] = *r1It;
79  rVect[1] = *r2It;
80 
81  std::vector<MOL_SPTR_VECT> lprods;
82  lprods = rxn.runReactants(rVect);
83  for(std::vector<MOL_SPTR_VECT>::const_iterator lpIt=lprods.begin();
84  lpIt!=lprods.end();++lpIt){
85  // we know this is a single-product reaction:
86  prods.push_back((*lpIt)[0]);
87  }
88  }
89  }
90  \endverbatim
91 
92  NOTES:
93  - to allow more control over the reaction, it is possible to flag reactant
94  atoms as being protected by setting the common_properties::_protected
95  property on those
96  atoms. Here's an example:
97  \verbatim
98  std::string smi="[O:1]>>[N:1]";
99  ChemicalReaction *rxn = RxnSmartsToChemicalReaction(smi);
100  rxn->initReactantMatchers();
101 
102  MOL_SPTR_VECT reacts;
103  reacts.clear();
104  smi = "OCO";
105  ROMol *mol = SmilesToMol(smi);
106  reacts.push_back(ROMOL_SPTR(mol));
107  std::vector<MOL_SPTR_VECT> prods;
108  prods = rxn->runReactants(reacts);
109  // here prods has two entries, because there are two Os in the
110  // reactant.
111 
112  reacts[0]->getAtomWithIdx(0)->setProp(common_properties::_protected,1);
113  prods = rxn->runReactants(reacts);
114  // here prods only has one entry, the reaction at atom 0
115  // has been blocked by the _protected property
116  \endverbatim
117 
118 */
120  friend class ReactionPickler;
121 
122  public:
125  df_needsInit = other.df_needsInit;
126  df_implicitProperties = other.df_implicitProperties;
127  for (MOL_SPTR_VECT::const_iterator iter = other.beginReactantTemplates();
128  iter != other.endReactantTemplates(); ++iter) {
129  ROMol *reactant = new ROMol(**iter);
130  m_reactantTemplates.push_back(ROMOL_SPTR(reactant));
131  }
132  for (MOL_SPTR_VECT::const_iterator iter = other.beginProductTemplates();
133  iter != other.endProductTemplates(); ++iter) {
134  ROMol *product = new ROMol(**iter);
135  m_productTemplates.push_back(ROMOL_SPTR(product));
136  }
137  for (MOL_SPTR_VECT::const_iterator iter = other.beginAgentTemplates();
138  iter != other.endAgentTemplates(); ++iter) {
139  ROMol *agent = new ROMol(**iter);
140  m_agentTemplates.push_back(ROMOL_SPTR(agent));
141  }
142  d_props = other.d_props;
143  }
144  //! construct a reaction from a pickle string
145  ChemicalReaction(const std::string &binStr);
146 
147  //! Adds a new reactant template
148  /*!
149  \return the number of reactants
150 
151  */
152  unsigned int addReactantTemplate(ROMOL_SPTR mol) {
153  this->df_needsInit = true;
154  this->m_reactantTemplates.push_back(mol);
155  return rdcast<unsigned int>(this->m_reactantTemplates.size());
156  }
157 
158  //! Adds a new agent template
159  /*!
160  \return the number of agent
161 
162  */
163  unsigned int addAgentTemplate(ROMOL_SPTR mol) {
164  this->m_agentTemplates.push_back(mol);
165  return rdcast<unsigned int>(this->m_agentTemplates.size());
166  }
167 
168  //! Adds a new product template
169  /*!
170  \return the number of products
171 
172  */
173  unsigned int addProductTemplate(ROMOL_SPTR mol) {
174  this->m_productTemplates.push_back(mol);
175  return rdcast<unsigned int>(this->m_productTemplates.size());
176  }
177 
178  //! Removes the reactant templates from a reaction if atom mapping ratio is
179  // below a given threshold
180  /*! By default the removed reactant templates were attached to the agent
181  templates.
182  An alternative will be to provide a pointer to a molecule vector where
183  these reactants should be saved.
184  */
185  void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms = 0.2,
186  bool moveToAgentTemplates = true,
187  MOL_SPTR_VECT *targetVector = nullptr);
188 
189  //! Removes the product templates from a reaction if its atom mapping ratio is
190  // below a given threshold
191  /*! By default the removed products templates were attached to the agent
192  templates.
193  An alternative will be to provide a pointer to a molecule vector where
194  these products should be saved.
195  */
196  void removeUnmappedProductTemplates(double thresholdUnmappedAtoms = 0.2,
197  bool moveToAgentTemplates = true,
198  MOL_SPTR_VECT *targetVector = nullptr);
199 
200  /*! Removes the agent templates from a reaction if a pointer to a
201  molecule vector is provided the agents are stored therein.*/
202  void removeAgentTemplates(MOL_SPTR_VECT *targetVector = nullptr);
203 
204  //! Runs the reaction on a set of reactants
205  /*!
206 
207  \param reactants the reactants to be used. The length of this must be equal
208  to this->getNumReactantTemplates()
209  \param maxProducts: if non zero, the maximum number of products to generate
210  before stopping. If hit a warning will be generated.
211 
212  \return a vector of vectors of products. Each subvector will be
213  this->getNumProductTemplates() long.
214 
215  We return a vector of vectors of products because each individual template
216  may map multiple times onto its reactant. This leads to multiple possible
217  result sets.
218  */
219  std::vector<MOL_SPTR_VECT> runReactants(
220  const MOL_SPTR_VECT reactants, unsigned int numProducts = 1000) const;
221 
222  //! Runs a single reactant against a single reactant template
223  /*!
224  \param reactant The single reactant to use
225 
226  \param reactantTemplateIdx the reactant template to target in the reaction
227  */
228  std::vector<MOL_SPTR_VECT> runReactant(
229  ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const;
230 
231  const MOL_SPTR_VECT &getReactants() const {
232  return this->m_reactantTemplates;
233  }
234  const MOL_SPTR_VECT &getAgents() const { return this->m_agentTemplates; }
235  const MOL_SPTR_VECT &getProducts() const { return this->m_productTemplates; }
236 
237  MOL_SPTR_VECT::const_iterator beginReactantTemplates() const {
238  return this->m_reactantTemplates.begin();
239  }
240  MOL_SPTR_VECT::const_iterator endReactantTemplates() const {
241  return this->m_reactantTemplates.end();
242  }
243 
244  MOL_SPTR_VECT::const_iterator beginProductTemplates() const {
245  return this->m_productTemplates.begin();
246  }
247  MOL_SPTR_VECT::const_iterator endProductTemplates() const {
248  return this->m_productTemplates.end();
249  }
250 
251  MOL_SPTR_VECT::const_iterator beginAgentTemplates() const {
252  return this->m_agentTemplates.begin();
253  }
254  MOL_SPTR_VECT::const_iterator endAgentTemplates() const {
255  return this->m_agentTemplates.end();
256  }
257 
258  MOL_SPTR_VECT::iterator beginReactantTemplates() {
259  return this->m_reactantTemplates.begin();
260  }
261  MOL_SPTR_VECT::iterator endReactantTemplates() {
262  return this->m_reactantTemplates.end();
263  }
264 
265  MOL_SPTR_VECT::iterator beginProductTemplates() {
266  return this->m_productTemplates.begin();
267  }
268  MOL_SPTR_VECT::iterator endProductTemplates() {
269  return this->m_productTemplates.end();
270  }
271 
272  MOL_SPTR_VECT::iterator beginAgentTemplates() {
273  return this->m_agentTemplates.begin();
274  }
275  MOL_SPTR_VECT::iterator endAgentTemplates() {
276  return this->m_agentTemplates.end();
277  }
278  unsigned int getNumReactantTemplates() const {
279  return rdcast<unsigned int>(this->m_reactantTemplates.size());
280  };
281  unsigned int getNumProductTemplates() const {
282  return rdcast<unsigned int>(this->m_productTemplates.size());
283  };
284  unsigned int getNumAgentTemplates() const {
285  return rdcast<unsigned int>(this->m_agentTemplates.size());
286  };
287 
288  //! initializes our internal reactant-matching datastructures.
289  /*!
290  This must be called after adding reactants and before calling
291  runReactants.
292 
293  \param silent: If this bool is true, no messages will be logged during the
294  validation. By default, validation problems are reported to the warning
295  and error logs depending on their severity.
296  */
297  void initReactantMatchers(bool silent = false);
298 
299  bool isInitialized() const { return !df_needsInit; };
300 
301  //! validates the reactants and products to make sure the reaction seems
302  //"reasonable"
303  /*!
304  \return true if the reaction validates without errors (warnings do not
305  stop validation)
306 
307  \param numWarnings used to return the number of validation warnings
308  \param numErrors used to return the number of validation errors
309 
310  \param silent: If this bool is true, no messages will be logged during the
311  validation. By default, validation problems are reported to the warning
312  and error logs depending on their severity.
313 
314  */
315  bool validate(unsigned int &numWarnings, unsigned int &numErrors,
316  bool silent = false) const;
317 
318  //! returns whether or not the reaction uses implicit
319  //! properties on the product atoms
320  /*!
321 
322  This toggles whether or not unspecified atomic properties in the
323  products are considered to be implicit and should be copied from
324  the actual reactants. This is necessary due to a semantic difference
325  between the "reaction SMARTS" approach and the MDL RXN
326  approach:
327  In "reaction SMARTS", this reaction:
328  [C:1]-[Br:2].[O-:3]>>[C:1]-[O:3].[Br-:2]
329  applied to [CH4+]Br should yield [CH4+]O
330  Something similar drawn in an rxn file, and applied to
331  [CH4+]Br should yield [CH3]O.
332  In rxn there is no charge on the product C because nothing is
333  specified in the rxn file; in "SMARTS" the charge from the
334  actual reactants is not *removed* because no charge is
335  specified in the reaction.
336 
337  */
338  bool getImplicitPropertiesFlag() const { return df_implicitProperties; };
339  //! sets the implicit properties flag. See the documentation for
340  //! getImplicitProertiesFlag() for a discussion of what this means.
341  void setImplicitPropertiesFlag(bool val) { df_implicitProperties = val; };
342 
343  private:
344  bool df_needsInit{true};
345  bool df_implicitProperties{false};
346  MOL_SPTR_VECT m_reactantTemplates, m_productTemplates, m_agentTemplates;
347  ChemicalReaction &operator=(const ChemicalReaction &); // disable assignment
348 };
349 
350 //! tests whether or not the molecule has a substructure match
351 //! to any of the reaction's reactants
352 //! the \c which argument is used to return which of the reactants
353 //! the molecule matches. If there's no match, it is equal to the number
354 //! of reactants on return
356  const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which);
357 //! \overload
359  const ChemicalReaction &rxn, const ROMol &mol);
360 
361 //! tests whether or not the molecule has a substructure match
362 //! to any of the reaction's products
363 //! the \c which argument is used to return which of the products
364 //! the molecule matches. If there's no match, it is equal to the number
365 //! of products on return
367  const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which);
368 //! \overload
370  const ChemicalReaction &rxn, const ROMol &mol);
371 
372 //! tests whether or not the molecule has a substructure match
373 //! to any of the reaction's agents
374 //! the \c which argument is used to return which of the agents
375 //! the molecule matches. If there's no match, it is equal to the number
376 //! of agents on return
378  const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which);
379 //! \overload
381  const ChemicalReaction &rxn, const ROMol &mol);
382 
383 //! returns indices of the atoms in each reactant that are changed
384 //! in the reaction
385 /*!
386  \param rxn the reaction we are interested in
387 
388  \param mappedAtomsOnly if set, atoms that are not mapped will not be included
389  in the list of changed atoms (otherwise they are automatically included)
390 
391  How are changed atoms recognized?
392  1) Atoms whose degree changes
393  2) Atoms whose bonding pattern changes
394  3) unmapped atoms (unless the mappedAtomsOnly flag is set)
395  4) Atoms connected to unmapped atoms
396  5) Atoms whose atomic number changes (unless the
397  corresponding product atom is a dummy)
398  6) Atoms with more than one atomic number query (unless the
399  corresponding product atom is a dummy)
400 
401  Note that the atomic number of a query atom depends on how it's constructed.
402  When coming from SMARTS: if the first query is an atomic label/number that
403  sets the atomic number, otherwise it's zero.
404  For example [O;$(OC)] is atomic number 8 while [$(OC);O] is atomic
405  number 0.
406  When coming from RXN: the atomic number of the atom in the rxn file sets
407  the value.
408  */
410 getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly = false);
411 
412 //! add the recursive queries to the reactants of a reaction
413 /*!
414  This does its work using RDKit::addRecursiveQueries()
415 
416  \param rxn the reaction we are interested in
417  \param queries - the dictionary of named queries to add
418  \param propName - the atom property to use to get query names
419  optional:
420  \param reactantLabels - to store pairs of (atom index, query string)
421  per reactant
422 
423  NOTES:
424  - existing query information, if present, will be supplemented (AND logic)
425  - non-query atoms will be replaced with query atoms using only the query
426  logic
427  - query names can be present as comma separated lists, they will then
428  be combined using OR logic.
429  - throws a KeyErrorException if a particular query name is not present
430  in \c queries
431 
432  */
434  ChemicalReaction &rxn, const std::map<std::string, ROMOL_SPTR> &queries,
435  const std::string &propName,
436  std::vector<std::vector<std::pair<unsigned int, std::string>>>
437  *reactantLabels = nullptr);
438 
439 } // namespace RDKit
440 
441 namespace RDDepict {
442 //! \brief Generate 2D coordinates (a depiction) for a reaction
443 /*!
444 
445  \param rxn the reaction we are interested in
446 
447  \param spacing the spacing between components of the reaction
448 
449  \param updateProps if set, properties such as conjugation and
450  hybridization will be calculated for the reactant and product
451  templates before generating coordinates. This should result in
452  better depictions, but can lead to errors in some cases.
453 
454  \param canonOrient canonicalize the orientation so that the long
455  axes align with the x-axis etc.
456 
457  \param nFlipsPerSample - the number of rotatable bonds that are
458  flipped at random for each sample
459 
460  \param nSamples - the number of samples
461 
462  \param sampleSeed - seed for the random sampling process
463 
464  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
465  atoms with four neighbors in order to improve the depiction
466 
467  for the other parameters see the documentation for compute2DCoords()
468 
469 */
471  RDKit::ChemicalReaction &rxn, double spacing = 2.0, bool updateProps = true,
472  bool canonOrient = false, unsigned int nFlipsPerSample = 0,
473  unsigned int nSamples = 0, int sampleSeed = 0,
474  bool permuteDeg4Nodes = false);
475 
476 } // namespace RDDepict
477 
478 #endif
pulls in the core RDKit functionality
used to indicate an error in the chemical reaction engine
Definition: Reaction.h:47
ChemicalReactionException(const char *msg)
construct with an error message
Definition: Reaction.h:50
ChemicalReactionException(const std::string msg)
construct with an error message
Definition: Reaction.h:52
~ChemicalReactionException() noexcept
Definition: Reaction.h:55
const char * what() const noexcept override
get the error message
Definition: Reaction.h:54
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:119
unsigned int addProductTemplate(ROMOL_SPTR mol)
Adds a new product template.
Definition: Reaction.h:173
unsigned int addAgentTemplate(ROMOL_SPTR mol)
Adds a new agent template.
Definition: Reaction.h:163
unsigned int addReactantTemplate(ROMOL_SPTR mol)
Adds a new reactant template.
Definition: Reaction.h:152
unsigned int getNumAgentTemplates() const
Definition: Reaction.h:284
bool getImplicitPropertiesFlag() const
Definition: Reaction.h:338
std::vector< MOL_SPTR_VECT > runReactants(const MOL_SPTR_VECT reactants, unsigned int numProducts=1000) const
Runs the reaction on a set of reactants.
unsigned int getNumReactantTemplates() const
Definition: Reaction.h:278
ChemicalReaction(const std::string &binStr)
construct a reaction from a pickle string
MOL_SPTR_VECT::iterator beginProductTemplates()
Definition: Reaction.h:265
void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms=0.2, bool moveToAgentTemplates=true, MOL_SPTR_VECT *targetVector=nullptr)
Removes the reactant templates from a reaction if atom mapping ratio is.
const MOL_SPTR_VECT & getProducts() const
Definition: Reaction.h:235
std::vector< MOL_SPTR_VECT > runReactant(ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const
Runs a single reactant against a single reactant template.
MOL_SPTR_VECT::const_iterator beginProductTemplates() const
Definition: Reaction.h:244
void initReactantMatchers(bool silent=false)
initializes our internal reactant-matching datastructures.
MOL_SPTR_VECT::const_iterator endReactantTemplates() const
Definition: Reaction.h:240
void setImplicitPropertiesFlag(bool val)
Definition: Reaction.h:341
MOL_SPTR_VECT::iterator endProductTemplates()
Definition: Reaction.h:268
unsigned int getNumProductTemplates() const
Definition: Reaction.h:281
const MOL_SPTR_VECT & getAgents() const
Definition: Reaction.h:234
const MOL_SPTR_VECT & getReactants() const
Definition: Reaction.h:231
MOL_SPTR_VECT::const_iterator endProductTemplates() const
Definition: Reaction.h:247
bool isInitialized() const
Definition: Reaction.h:299
ChemicalReaction(const ChemicalReaction &other)
Definition: Reaction.h:124
MOL_SPTR_VECT::const_iterator beginAgentTemplates() const
Definition: Reaction.h:251
void removeAgentTemplates(MOL_SPTR_VECT *targetVector=nullptr)
MOL_SPTR_VECT::const_iterator beginReactantTemplates() const
Definition: Reaction.h:237
MOL_SPTR_VECT::iterator beginReactantTemplates()
Definition: Reaction.h:258
MOL_SPTR_VECT::iterator endAgentTemplates()
Definition: Reaction.h:275
MOL_SPTR_VECT::iterator beginAgentTemplates()
Definition: Reaction.h:272
MOL_SPTR_VECT::iterator endReactantTemplates()
Definition: Reaction.h:261
void removeUnmappedProductTemplates(double thresholdUnmappedAtoms=0.2, bool moveToAgentTemplates=true, MOL_SPTR_VECT *targetVector=nullptr)
Removes the product templates from a reaction if its atom mapping ratio is.
MOL_SPTR_VECT::const_iterator endAgentTemplates() const
Definition: Reaction.h:254
bool validate(unsigned int &numWarnings, unsigned int &numErrors, bool silent=false) const
validates the reactants and products to make sure the reaction seems
Dict d_props
Definition: RDProps.h:16
handles pickling (serializing) reactions
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:86
RDKIT_CHEMREACTIONS_EXPORT void compute2DCoordsForReaction(RDKit::ChemicalReaction &rxn, double spacing=2.0, bool updateProps=true, bool canonOrient=false, unsigned int nFlipsPerSample=0, unsigned int nSamples=0, int sampleSeed=0, bool permuteDeg4Nodes=false)
Generate 2D coordinates (a depiction) for a reaction.
Std stuff.
Definition: Abbreviations.h:17
RDKIT_CHEMREACTIONS_EXPORT bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:285
RDKIT_CHEMREACTIONS_EXPORT bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
RDKIT_CHEMREACTIONS_EXPORT VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly=false)
boost::shared_ptr< ROMol > ROMOL_SPTR
RDKIT_CHEMREACTIONS_EXPORT void addRecursiveQueriesToReaction(ChemicalReaction &rxn, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::vector< std::pair< unsigned int, std::string >>> *reactantLabels=nullptr)
add the recursive queries to the reactants of a reaction
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
RDKIT_CHEMREACTIONS_EXPORT bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)