Пример #1
0
void IndigoInchi::generateInchiInput (Molecule &mol, inchi_Input &input, 
   Array<inchi_Atom> &atoms, Array<inchi_Stereo0D> &stereo)
{
   QS_DEF(Array<int>, mapping);
   mapping.clear_resize(mol.vertexEnd());
   mapping.fffill();
   int index = 0;
   for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v))
      mapping[v] = index++;
   atoms.clear_resize(index);
   atoms.zerofill();

   stereo.clear();
   for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v))
   {
      inchi_Atom &atom = atoms[mapping[v]];
      
      int atom_number = mol.getAtomNumber(v);
      if (atom_number == ELEM_PSEUDO)
         throw IndigoError("Molecule with pseudoatom (%s) cannot be converted into InChI", mol.getPseudoAtom(v));
      if (atom_number == ELEM_RSITE)
         throw IndigoError("Molecule with RGroups cannot be converted into InChI");
      strncpy(atom.elname, Element::toString(atom_number), ATOM_EL_LEN);

      Vec3f &c = mol.getAtomXyz(v);
      atom.x = c.x;
      atom.y = c.y;
      atom.z = c.z;
                              
      // connectivity
      const Vertex &vtx = mol.getVertex(v);
      int nei_idx = 0;
      for (int nei = vtx.neiBegin(); nei != vtx.neiEnd(); nei = vtx.neiNext(nei))
      {
         int v_nei = vtx.neiVertex(nei);
         atom.neighbor[nei_idx] = mapping[v_nei];
         int edge_idx = vtx.neiEdge(nei);
         atom.bond_type[nei_idx] = getInchiBondType(mol.getBondOrder(edge_idx));

         int bond_stereo = INCHI_BOND_STEREO_NONE;
         if (mol.cis_trans.isIgnored(edge_idx))
            bond_stereo = INCHI_BOND_STEREO_DOUBLE_EITHER;
         else
         {
            int dir = mol.getBondDirection2(v, v_nei);
            if (mol.getBondDirection2(v, v_nei) == BOND_EITHER)
               bond_stereo = INCHI_BOND_STEREO_SINGLE_1EITHER;
            else if (mol.getBondDirection2(v_nei, v) == BOND_EITHER)
               bond_stereo = INCHI_BOND_STEREO_SINGLE_2EITHER;
         }
         atom.bond_stereo[nei_idx] = bond_stereo;
         nei_idx++;
      }
      atom.num_bonds = vtx.degree();

      // Other properties
      atom.isotopic_mass = mol.getAtomIsotope(v);
      atom.radical = mol.getAtomRadical(v);
      atom.charge = mol.getAtomCharge(v);

      // Hydrogens
      int hcount = -1;
      if (Molecule::shouldWriteHCount(mol, v) || mol.isExplicitValenceSet(v) || mol.isImplicitHSet(v))
      {
         if (mol.getAtomAromaticity(v) == ATOM_AROMATIC &&
            atom_number == ELEM_C && atom.charge == 0 && atom.radical == 0)
         {
            // Do not set number of implicit hydrogens here as InChI throws an exception on
            // the molecule B1=CB=c2cc3B=CC=c3cc12
            ;
         }
         else
            // set -1 to tell InChI add implicit hydrogens automatically
            hcount = mol.getImplicitH_NoThrow(v, -1); 
      }
      atom.num_iso_H[0] = hcount;
   }
  
   // Process cis-trans bonds
   for (int e = mol.edgeBegin(); e != mol.edgeEnd(); e = mol.edgeNext(e))
   {
      if (mol.cis_trans.getParity(e) == 0)
         continue;

      int subst[4];
      mol.cis_trans.getSubstituents_All(e, subst);

      const Edge &edge = mol.getEdge(e);

      inchi_Stereo0D &st = stereo.push();

      // Write it as
      // #0 - #1 = #2 - #3
      st.neighbor[0] = mapping[subst[0]];
      st.neighbor[1] = mapping[edge.beg];
      st.neighbor[2] = mapping[edge.end];
      st.neighbor[3] = mapping[subst[2]];

      if (mol.cis_trans.getParity(e) == MoleculeCisTrans::CIS)
         st.parity = INCHI_PARITY_ODD;
      else
         st.parity = INCHI_PARITY_EVEN;

      st.central_atom = NO_ATOM;
      st.type = INCHI_StereoType_DoubleBond;
   }

   // Process tetrahedral stereocenters
   for (int i = mol.stereocenters.begin(); i != mol.stereocenters.end(); i = mol.stereocenters.next(i))
   {
      int v = mol.stereocenters.getAtomIndex(i);

      int type, group, pyramid[4];
      mol.stereocenters.get(v, type, group, pyramid);
      if (type == MoleculeStereocenters::ATOM_ANY)
         continue;

      for (int i = 0; i < 4; i++)
         if (pyramid[i] != -1)
            pyramid[i] = mapping[pyramid[i]];

      inchi_Stereo0D &st = stereo.push();

      /*
         4 neighbors

                  X                    neighbor[4] : {#W, #X, #Y, #Z}
                  |                    central_atom: #A
               W--A--Y                 type        : INCHI_StereoType_Tetrahedral
                  |
                  Z
         parity: if (X,Y,Z) are clockwize when seen from W then parity is 'e' otherwise 'o'
         Example (see AXYZW above): if W is above the plane XYZ then parity = 'e'

         3 neighbors

                    Y          Y       neighbor[4] : {#A, #X, #Y, #Z}
                   /          /        central_atom: #A
               X--A  (e.g. O=S   )     type        : INCHI_StereoType_Tetrahedral
                   \          \
                    Z          Z
      */
      int offset = 0;
      if (pyramid[3] == -1)
         offset = 1;

      st.neighbor[offset] = mapping[pyramid[0]];
      st.neighbor[offset + 1] = mapping[pyramid[1]];
      st.neighbor[offset + 2] = mapping[pyramid[2]];
      if (offset == 0)
         st.neighbor[3] = mapping[pyramid[3]];
      else
         st.neighbor[0] = mapping[v];

      st.parity = INCHI_PARITY_ODD;
      if (offset != 0)
         st.parity = INCHI_PARITY_ODD;
      else
         st.parity = INCHI_PARITY_EVEN;
      st.central_atom = mapping[v];
      st.type = INCHI_StereoType_Tetrahedral;
   }

   input.atom = atoms.ptr();
   input.num_atoms = atoms.size();
   input.stereo0D = stereo.ptr();
   input.num_stereo0D = stereo.size();
   input.szOptions = options.ptr();
}
Пример #2
0
void CanonicalSmilesSaver::saveMolecule (Molecule &mol_) const
{
   if (mol_.vertexCount() < 1)
      return;

   QS_DEF(Array<int>, ignored);
   QS_DEF(Array<int>, order);
   QS_DEF(Array<int>, ranks);
   QS_DEF(Molecule, mol);
   int i;

   if (mol_.repeating_units.size() > 0)
      throw Error("can not canonicalize a polymer");

   // Detect hydrogens configuration if aromatic but not ambiguous
   bool found_invalid_h = false;
   for (i = mol_.vertexBegin(); i != mol_.vertexEnd(); i = mol_.vertexNext(i))
   {
      if (mol_.isRSite(i) || mol_.isPseudoAtom(i))
         continue;

      if (mol_.getImplicitH_NoThrow(i, -1) == -1)
         found_invalid_h = true;
   }
   if (found_invalid_h)
   {
      AromaticityOptions options;
      options.method = AromaticityOptions::GENERIC;
      options.unique_dearomatization = true;
      MoleculeDearomatizer::restoreHydrogens(mol_, options);
   }

   mol.clone(mol_, 0, 0);

   // TODO: canonicalize allenes properly
   mol.allene_stereo.clear();

   ignored.clear_resize(mol.vertexEnd());
   ignored.zerofill();

   for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
      if (mol.convertableToImplicitHydrogen(i))
         ignored[i] = 1;

   for (i = mol.edgeBegin(); i != mol.edgeEnd(); i = mol.edgeNext(i))
      if (mol.getBondTopology(i) == TOPOLOGY_RING && mol.cis_trans.getParity(i) != 0)
      {
         // we save cis/trans ring bonds into SMILES, but only those who
         // do not participate in bigger ring systems
         const Edge &edge = mol.getEdge(i);

         if (mol.getAtomRingBondsCount(edge.beg) != 2 ||
             mol.getAtomRingBondsCount(edge.end) != 2)
         {
            mol.cis_trans.setParity(i, 0);
            continue;
         }

         // also, discard the cis-trans bonds that have been converted to aromatic
         const Vertex &beg = mol.getVertex(edge.beg);
         const Vertex &end = mol.getVertex(edge.end);
         bool have_singlebond_beg = false;
         bool have_singlebond_end = false;
         int j;
         
         for (j = beg.neiBegin(); j != beg.neiEnd(); j = beg.neiNext(j))
            if (mol.getBondOrder(beg.neiEdge(j)) == BOND_SINGLE)
               have_singlebond_beg = true;

         for (j = end.neiBegin(); j != end.neiEnd(); j = end.neiNext(j))
            if (mol.getBondOrder(end.neiEdge(j)) == BOND_SINGLE)
               have_singlebond_end = true;

         if (!have_singlebond_beg || !have_singlebond_end)
         {
            mol.cis_trans.setParity(i, 0);
            continue;
         }
      }
         
   MoleculeAutomorphismSearch of;

   of.detect_invalid_cistrans_bonds = find_invalid_stereo;
   of.detect_invalid_stereocenters = find_invalid_stereo;
   of.find_canonical_ordering = true;
   of.ignored_vertices = ignored.ptr();
   of.process(mol);
   of.getCanonicalNumbering(order);

   for (i = mol.edgeBegin(); i != mol.edgeEnd(); i = mol.edgeNext(i))
      if (mol.cis_trans.getParity(i) != 0 && of.invalidCisTransBond(i))
         mol.cis_trans.setParity(i, 0);

   for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
      if (mol.stereocenters.getType(i) > MoleculeStereocenters::ATOM_ANY && of.invalidStereocenter(i))
         mol.stereocenters.remove(i);

   ranks.clear_resize(mol.vertexEnd());

   for (i = 0; i < order.size(); i++)
      ranks[order[i]] = i;

   SmilesSaver saver(_output);

   saver.ignore_invalid_hcount = false;
   saver.vertex_ranks = ranks.ptr();
   saver.ignore_hydrogens = true;
   saver.canonize_chiralities = true;
   saver.saveMolecule(mol);
}
void MoleculeAutomorphismSearch::_calculateHydrogensAndDegree (Molecule &mol)
{
   _hcount.clear_resize(mol.vertexEnd());
   _degree.clear_resize(mol.vertexEnd());
   _degree.zerofill();

   for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
   {
      if (mol.isRSite(i) || mol.isPseudoAtom(i) || mol.isTemplateAtom(i))
         _hcount[i] = 0;
      else
         _hcount[i] = mol.getImplicitH_NoThrow(i, -1);

      if (_hcount[i] < 0)
      {
         if (mol.getAtomAromaticity(i) == ATOM_AROMATIC)
         {
            if (mol.getAtomNumber(i) == ELEM_C && mol.getAtomCharge(i) == 0)
            {
               if (mol.getVertex(i).degree() == 3)
                  _hcount[i] = 0;
               else if (mol.getVertex(i).degree() == 2)
                  _hcount[i] = 1;
            }
            else if (mol.getAtomNumber(i) == ELEM_O && mol.getAtomCharge(i) == 0)
               _hcount[i] = 0;
            else
            {
               if (!allow_undefined)
                  // This code will throw an error with a good explanation
                  _hcount[i] = mol.getImplicitH(i);
               else
                  // Make number of hydrogens unique in order to make such atoms unique
                  _hcount[i] = 101 + i; 
            }
         }
         else
         {
            // Number of atoms are underfined, but all the properties like 
            // connectivity, charge, and etc., and this mean that such 
            // atoms are comparable even. 
            // For example, this cis-trans bond is invalid even if the number
            // of hydrogens are undefined: CC=C(N(C)=O)N(C)=O
            _hcount[i] = 100; // Any big number.
            // Later this number can be increased including neighbour hydrogens, 
            // and this is correct, because nitrogens in these molecules are different:
            // C[N](C)=O and [H][N]([H])(C)(C)=O
         }
      }

      const Vertex &vertex = mol.getVertex(i);

      _degree[i] = 0;
      if (ignored_vertices != 0 && ignored_vertices[i])
         continue;

      for (int j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j))
      {
         if (mol.getAtomNumber(vertex.neiVertex(j)) == ELEM_H &&
             mol.getAtomIsotope(vertex.neiVertex(j)) == 0)
            _hcount[i]++;

         if (ignored_vertices == 0 || ignored_vertices[vertex.neiVertex(j)] == 0)
            _degree[i]++;
      }
   }

   // Compute independent components if the canonical ordering is not required
   _independent_component_index.clear_resize(mol.vertexEnd());
   if (!find_canonical_ordering)
   {
      // We can mark different connected components as independent
      GraphDecomposer decomposer(mol);
      decomposer.decompose();
      _independent_component_index.copy(decomposer.getDecomposition());
   }
   else
      _independent_component_index.fffill();
}