void MoleculeAutomorphismSearch::_calculateHydrogensAndDegree (Molecule &mol)
{
   _hcount.clear_resize(mol.vertexEnd());
   _degree.clear_resize(mol.vertexEnd());
   _degree.zerofill();

   for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
   {
      if (mol.isRSite(i) || mol.isPseudoAtom(i))
         _hcount[i] = 0;
      else
         _hcount[i] = mol.getImplicitH_NoThrow(i, -1);

      if (_hcount[i] < 0)
      {
         if (mol.getAtomAromaticity(i) == ATOM_AROMATIC)
         {
            if (mol.getAtomNumber(i) == ELEM_C && mol.getAtomCharge(i) == 0)
            {
               if (mol.getVertex(i).degree() == 3)
                  _hcount[i] = 0;
               else if (mol.getVertex(i).degree() == 2)
                  _hcount[i] = 1;
            }
            else if (mol.getAtomNumber(i) == ELEM_O && mol.getAtomCharge(i) == 0)
               _hcount[i] = 0;
            else
            {
               if (!allow_undefined)
                  // This code will throw an error with a good explanation
                  _hcount[i] = mol.getImplicitH(i);
               else
                  // Make number of hydrogens unique in order to make such atoms unique
                  _hcount[i] = 101 + i; 
            }
         }
         else
         {
            // Number of atoms are underfined, but all the properties like 
            // connectivity, charge, and etc., and this mean that such 
            // atoms are comparable even. 
            // For example, this cis-trans bond is invalid even if the number
            // of hydrogens are undefined: CC=C(N(C)=O)N(C)=O
            _hcount[i] = 100; // Any big number.
            // Later this number can be increased including neighbour hydrogens, 
            // and this is correct, because nitrogens in these molecules are different:
            // C[N](C)=O and [H][N]([H])(C)(C)=O
         }
      }

      const Vertex &vertex = mol.getVertex(i);

      _degree[i] = 0;
      if (ignored_vertices != 0 && ignored_vertices[i])
         continue;

      for (int j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j))
      {
         if (mol.getAtomNumber(vertex.neiVertex(j)) == ELEM_H &&
             mol.getAtomIsotope(vertex.neiVertex(j)) == 0)
            _hcount[i]++;

         if (ignored_vertices == 0 || ignored_vertices[vertex.neiVertex(j)] == 0)
            _degree[i]++;
      }
   }

   // Compute independent components if the canonical ordering is not required
   _independent_component_index.clear_resize(mol.vertexEnd());
   if (!find_canonical_ordering)
   {
      // We can mark different connected components as independent
      GraphDecomposer decomposer(mol);
      decomposer.decompose();
      _independent_component_index.copy(decomposer.getDecomposition());
   }
   else
      _independent_component_index.fffill();
}
bool AbbreviationExpander::tryExpandToken (TokenChain &tokens, size_t &offset, Molecule &m, AttPoint &attach_to)
{
   Token &cur = tokens[offset];

   if (cur.multiplier != 1)
      return false;

   Array<int> connection_points;
   if (cur.type == Token::Element)
   {
      if (cur.index == ELEM_H)
      {
         offset++;
         attach_to = AttPoint(-1, 0);
         return true;
      }
      int added = m.addAtom(cur.index);

      // Get the number of bonds to connect
      int valence, hyd;
      int conn = attach_to.order;
      if (offset + 1 < tokens.size())
      {
         Token &next = tokens[offset + 1];
         conn += next.multiplier;
      }

      if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false))
      {
         // Ignore next atom
         // Appear in the OH3C case when H3 is belong to C
         conn = attach_to.order;
         if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false))
            return false;
      }

      for (int i = 0; i < hyd + conn; i++)
         connection_points.push(added);
   }
   else if (cur.type == Token::Pattern)
   {
      // Add pattern
      BufferScanner scanner(abbreviations[cur.index].expansion.c_str());
      SmilesLoader loader(scanner);

      Molecule abbr;
      loader.loadMolecule(abbr);

      Array<int> mapping;
      Array<int> rsites;
      m.mergeWithMolecule(abbr, &mapping);
      for (int v = abbr.vertexBegin(); v != abbr.vertexEnd(); v = abbr.vertexNext(v))
      {
         int mapped = mapping[v];
         if (m.isRSite(mapped))
         {
            dword bits = m.getRSiteBits(mapped);
            int id1 = bitGetOneHOIndex(bits);
            int id2 = bitGetOneHOIndex(bits);
            if (id1 != id2)
               throw Exception("Invalid abbreviations specification: %s", 
                  abbreviations[cur.index].expansion.c_str());
            if (id1 != 0)
               id1--; // R == R1

            const Vertex &vertex = m.getVertex(mapped);
            int nei = vertex.neiBegin();

            connection_points.expandFill(id1 + 1, -1);
            connection_points[id1] = vertex.neiVertex(nei); // Point connected to the RSite

            rsites.push(mapped);
         }
      }
      m.removeAtoms(rsites);
   }
   else
      return false;

   bool rollback = false;
   int atom_bound = m.vertexCount();
   size_t offset2 = offset + 1;

   attachBond(m, attach_to, connection_points[0]);
   int i = attach_to.order;
   while (i < connection_points.size() - 1 && !rollback)
   {
      if (offset2 >= tokens.size())
      {
         // If we are at the end then there can be an implicit double bond
         // Example: -CH2CH=
         // When we read C H there are no more tokens
         break;
      }

      Token &next = tokens[offset2];
      for (int j = 0; j < next.multiplier; j++)
      {
         if (i >= connection_points.size())
         {
            rollback = true;
            break;
         }

         if (next.type == Token::Branch)
         {
            AttPoint point(connection_points[i], 1);

            if (!expandParsedTokensWithRev(next.branch, m, point) || point.index != -1)
            {
               rollback = true;
               break;
            }
         }
         else
         {
            TokenChain chain;
            chain.push_back(next);
            chain[0].multiplier = 1;
            size_t local_offset = 0;
            AttPoint point(connection_points[i], 1);
            if (!tryExpandToken(chain, local_offset, m, point) || point.index != -1)
            {
               rollback = true;
               break;
            }
         }
         i++;
      }
      offset2++;
   }

   if (i > connection_points.size())
      rollback = true;
   if (!rollback)
   {
      if (i == connection_points.size())
      {
         // This is terminal
         attach_to = AttPoint(-1, 0);
      }
      else if (i == connection_points.size() - 1)
         attach_to = AttPoint(connection_points[i], 1); // Last attachment point
      else
      {
         // Number of tokens are incomlete means that there are double bonds after
         attach_to = AttPoint(connection_points[i], connection_points.size() - i);
      }
   }

   if (rollback)
   {
      // Rollback
      Array<int> new_atoms;
      for (int v = m.vertexBegin(); v != m.vertexEnd(); v = m.vertexNext(v))
         if (v >= atom_bound)
            new_atoms.push(v);
      m.removeAtoms(new_atoms);
      return false;
   }
   offset = offset2;
   return true;
}
Example #3
0
void convertMolfile (char *path, char *filename, FileOutput &cpp_file)
{
   FileScanner molfile("%s\\%s", path, filename);
   MolfileLoader mf_loader(molfile);
   Molecule mol;
   QS_DEF(Array<int>, edges);

   printf("%s\n", filename);

   mf_loader.loadMolecule(mol, true);

   BiconnectedDecomposer bd(mol); 

   if (bd.decompose() != 1)
   {
      printf("Error: %s is not biconnected\n", filename);
      return;
   }

   int i, j;

   edges.clear_reserve(mol.edgeCount());

   for (i = mol.edgeBegin() ; i < mol.edgeEnd(); i = mol.edgeNext(i))
      edges.push(i);

   edges.qsort(edge_cmp, &mol);

   const Edge &edge = mol.getEdge(edges[edges.size() / 2]);

   Vec3f v1 = mol.getAtomPos(edge.beg);
   Vec3f v2 = mol.getAtomPos(edge.end);

   v1.z = 0.f;
   v2.z = 0.f;

   float scale = Vec3f::dist(v1, v2);

   if (scale < 0.0001f)
   {
      printf("Error: %s has zero bond\n", filename);
      return;
   }

   scale = 1.f / scale;

   int first_idx = mol.vertexBegin();
   Vec3f pos = mol.getAtomPos(first_idx);

   for (i = mol.vertexNext(first_idx); i < mol.vertexEnd(); i = mol.vertexNext(i))
   {
      if (mol.getAtomPos(i).y < pos.y)
      {
         pos = mol.getAtomPos(i);
         first_idx = i;
      }
   }


   for (i = mol.vertexBegin() ; i < mol.vertexEnd(); i = mol.vertexNext(i))
   {
      mol.getAtom2(i).pos.sub(pos);
      mol.getAtom2(i).pos.scale(scale);
   }

   char buf[1024];

   sprintf_s(buf, "BEGIN_PATTERN(\"%s\")", filename);
   cpp_file.writeStringCR(buf);   

   for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
   {
      sprintf_s(buf, "   ADD_ATOM(%d, %ff, %ff)", i, mol.getAtomPos(i).x, mol.getAtomPos(i).y);
      cpp_file.writeStringCR(buf);   
   }

   for (i = mol.edgeBegin(); i < mol.edgeEnd(); i = mol.edgeNext(i))
   {
      const Edge &edge = mol.getEdge(i);
      int type = mol.getBond(i).type;
      int qtype = mol.getQueryBond(i).type;

      sprintf_s(buf, "   ADD_BOND(%d, %d, %d)", edge.beg, edge.end, qtype != 0 ? qtype : type);
      cpp_file.writeStringCR(buf);   
   }

   Vec2f v, inter;
   Vec2f pos_i;
   int idx = mol.vertexCount();

   i = first_idx;

   float max_angle, cur_angle;
   float i_angle = 0;
   int next_nei = 0;
   int point_idx = 0;

   pos_i.set(mol.getAtomPos(i).x, mol.getAtomPos(i).y);

   while (true)
   {
      const Vertex &vert = mol.getVertex(i);

      if (i != first_idx)
      {
         v.set(pos_i.x, pos_i.y);
         pos_i.set(mol.getAtomPos(i).x, mol.getAtomPos(i).y);
         v.sub(pos_i);

         i_angle = v.tiltAngle2();
      } else if (point_idx > 0)
         break;

      sprintf_s(buf, "   OUTLINE_POINT(%d, %ff, %ff)", point_idx++, pos_i.x, pos_i.y);
      cpp_file.writeStringCR(buf);

      max_angle = 0.f;

      for (j = vert.neiBegin(); j < vert.neiEnd(); j = vert.neiNext(j))
      {
         const Vec3f &pos_nei = mol.getAtomPos(vert.neiVertex(j));

         v.set(pos_nei.x - pos_i.x, pos_nei.y - pos_i.y);

         cur_angle = v.tiltAngle2() - i_angle;

         if (cur_angle < 0.f)
            cur_angle += 2 * PI;

         if (max_angle < cur_angle)
         {
            max_angle = cur_angle;
            next_nei = j;
         }
      }
      
      i = vert.neiVertex(next_nei);

      float dist, min_dist = 0.f;
      int int_edge;
      Vec2f cur_v1 = pos_i;
      Vec2f cur_v2(mol.getAtomPos(i).x, mol.getAtomPos(i).y);

      while (min_dist < 10000.f)
      {
         min_dist = 10001.f;

         for (j = mol.edgeBegin(); j < mol.edgeEnd(); j = mol.edgeNext(j))
         {
            const Edge &edge = mol.getEdge(j);
            Vec2f cur_v3(mol.getAtomPos(edge.beg).x, mol.getAtomPos(edge.beg).y);
            Vec2f cur_v4(mol.getAtomPos(edge.end).x, mol.getAtomPos(edge.end).y);

            if (Vec2f::intersection(cur_v1, cur_v2, cur_v3, cur_v4, v))
               if ((dist = Vec2f::dist(cur_v1, v)) < min_dist)
               {
                  inter = v;
                  min_dist = dist;
                  int_edge = j;
               }
         }

         if (min_dist < 10000.f)
         {
            sprintf_s(buf, "   OUTLINE_POINT(%d, %ff, %ff)", point_idx++, v.x, v.y);
            cpp_file.writeStringCR(buf);   

            const Edge &edge = mol.getEdge(int_edge);
            Vec2f cur_v3(mol.getAtomPos(edge.beg).x, mol.getAtomPos(edge.beg).y);
            Vec2f cur_v4(mol.getAtomPos(edge.end).x, mol.getAtomPos(edge.end).y);

            Vec2f cur_v1v;
            Vec2f cur_v3v;
            Vec2f cur_v4v;

            cur_v1v.diff(cur_v1, inter);
            cur_v3v.diff(cur_v3, inter);
            cur_v4v.diff(cur_v4, inter);

            float angle1 = cur_v1v.tiltAngle2();
            float angle3 = cur_v3v.tiltAngle2() - angle1;
            float angle4 = cur_v4v.tiltAngle2() - angle1;

            if (angle3 < 0)
               angle3 += 2 * PI;
            if (angle4 < 0)
               angle4 += 2 * PI;

            cur_v1 = inter;

            if (angle3 > angle4)
            {
               cur_v2 = cur_v3;
               i = edge.beg;
            } else
            {
               cur_v2 = cur_v4;
               i = edge.end;
            }
         }
      }
   }

   cpp_file.writeStringCR("END_PATTERN()");   
}