Exemple #1
0
void CmfSaver::_encodeAtom (Molecule &mol, int idx, const int *mapping)
{
   int number = 0;

   if (mol.isPseudoAtom(idx))
   {
      const char *str = mol.getPseudoAtom(idx);
      size_t len = strlen(str);

      if (len < 1)
         throw Error("empty pseudo-atom");
      if (len > 255)
         throw Error("pseudo-atom labels %d characters long are not supported (255 is the limit)", len);

      _encode(CMF_PSEUDOATOM);
      _encode((byte)len);
      
      do
      {
         _encode(*str);
      } while (*(++str) != 0);
   }
   else if (mol.isRSite(idx))
   {
      int bits = mol.getRSiteBits(idx);
      if (bits > 255)
      {
         _encode(CMF_RSITE_EXT);
         _output->writePackedUInt((unsigned int)bits);
      }
      else
      {
         _encode(CMF_RSITE);
         _encode(bits);
      }
   }
   else
   {
      number = mol.getAtomNumber(idx);

      if (number <= 0 || number >= ELEM_MAX)
         throw Error("unexpected atom label");

      _encode(number);
   }

   int charge = mol.getAtomCharge(idx);

   if (charge != 0)
   {
      int charge2 = charge - CMF_MIN_CHARGE;
         
      if (charge2 < 0 || charge2 >= CMF_NUM_OF_CHARGES)
      {
         _encode(CMF_CHARGE_EXT);
         int charge3 = charge + 128;
         if (charge3 < 0 || charge >= 256)
            throw Error("unexpected atom charge: %d", charge);
         _encode(charge3);
      }
      else
         _encode(charge2 + CMF_CHARGES);
   }

   int isotope = mol.getAtomIsotope(idx);

   if (isotope > 0)
   {
      int deviation = isotope - Element::getDefaultIsotope(number);

      if (deviation == 0)
         _encode(CMF_ISOTOPE_ZERO);
      else if (deviation == 1)
         _encode(CMF_ISOTOPE_PLUS1);
      else if (deviation == 2)
         _encode(CMF_ISOTOPE_PLUS2);
      else if (deviation == -1)
         _encode(CMF_ISOTOPE_MINUS1);
      else if (deviation == -2)
         _encode(CMF_ISOTOPE_MINUS2);
      else
      {
         deviation += 100;
         if (deviation < 0 || deviation > 255)
            throw Error("unexpected %s isotope: %d", Element::toString(number), isotope);
         _encode(CMF_ISOTOPE_OTHER);
         _encode(deviation);
      }
   }

   int radical = 0;

   if (!mol.isPseudoAtom(idx) && !mol.isRSite(idx))
   {
      try
      {
         radical = mol.getAtomRadical(idx);
      }
      catch (Element::Error)
      {
      }
   }

   if (radical > 0)
   {
      if (radical == RADICAL_SINGLET)
         _encode(CMF_RADICAL_SINGLET);
      else if (radical == RADICAL_DOUBLET)
         _encode(CMF_RADICAL_DOUBLET);
      else if (radical == RADICAL_TRIPLET)
         _encode(CMF_RADICAL_TRIPLET);
      else
         throw Error("bad radical value: %d", radical);
   }
   
   MoleculeStereocenters &stereo = mol.stereocenters;
   
   int stereo_type = stereo.getType(idx);
   
   if (stereo_type == MoleculeStereocenters::ATOM_ANY)
      _encode(CMF_STEREO_ANY);
   else if (stereo_type != 0)
   {
      bool rigid;
      int code;
      const int *pyramid = stereo.getPyramid(idx);
      
      if (pyramid[3] == -1)
         rigid = MoleculeStereocenters::isPyramidMappingRigid(pyramid, 3, mapping);
      else
         rigid = MoleculeStereocenters::isPyramidMappingRigid(pyramid, 4, mapping);
      
      if (stereo_type == MoleculeStereocenters::ATOM_ABS)
         code = CMF_STEREO_ABS_0;
      else 
      {
         int group = stereo.getGroup(idx);

         if (group < 1 || group > CMF_MAX_STEREOGROUPS)
            throw Error("stereogroup number %d out of range", group);

         if (stereo_type == MoleculeStereocenters::ATOM_AND)
            code = CMF_STEREO_AND_0 + group - 1;
         else // stereo_type == MoleculeStereocenters::ATOM_OR
            code = CMF_STEREO_OR_0 + group - 1;
      }
      
      if (!rigid)
         // CMF_STEREO_*_0 -> CMF_STEREO_*_1
         code += CMF_MAX_STEREOGROUPS * 2 + 1;
      
      _encode(code);
   }

   if (mol.allene_stereo.isCenter(idx))
   {
      int left, right, parity, subst[4];

      mol.allene_stereo.getByAtomIdx(idx, left, right, subst, parity);
      if (subst[1] != -1 && mapping[subst[1]] != -1 && mapping[subst[1]] < mapping[subst[0]])
         parity = 3 - parity;
      if (subst[3] != -1 && mapping[subst[3]] != -1 && mapping[subst[3]] < mapping[subst[2]])
         parity = 3 - parity;
      if (parity == 1)
         _encode(CMF_STEREO_ALLENE_0);
      else
         _encode(CMF_STEREO_ALLENE_1);
   }


   int impl_h = 0;

   if (!mol.isPseudoAtom(idx) && !mol.isRSite(idx) && Molecule::shouldWriteHCount(mol, idx))
   {
      try
      {
         impl_h = mol.getImplicitH(idx);

         if (impl_h < 0 || impl_h > CMF_MAX_IMPLICIT_H)
            throw Error("implicit hydrogen count %d out of range", impl_h);

         _encode(CMF_IMPLICIT_H + impl_h);
      }
      catch (Element::Error)
      {
      }
   }

   if (!mol.isRSite(idx) && !mol.isPseudoAtom(idx))
   {
      if (mol.getAtomAromaticity(idx) == ATOM_AROMATIC && (charge != 0 || (number != ELEM_C && number != ELEM_O)))
      {
         try
         {
            int valence = mol.getAtomValence(idx);
            if (valence < 0 || valence > CMF_MAX_VALENCE)
            {
               _encode(CMF_VALENCE_EXT);
               _output->writePackedUInt(valence);
            }
            else
               _encode(CMF_VALENCE + valence);
         }
         catch (Element::Error)
         {
         }
      }
   }

   int i;

   for (i = 1; i <= mol.attachmentPointCount(); i++)
   {
      int j, aidx;

      for (j = 0; (aidx = mol.getAttachmentPoint(i, j)) != -1; j++)
         if (aidx == idx)
         {
            _encode(CMF_ATTACHPT);
            _encode(i);
         }
   }

   if (atom_flags != 0)
   {
      int i, flags = atom_flags[idx];

      for (i = 0; i < CMF_NUM_OF_ATOM_FLAGS; i++)
         if (flags & (1 << i))
            _encode(CMF_ATOM_FLAGS + i);
   }

   if (save_highlighting)
      if (mol.isAtomHighlighted(idx))
         _encode(CMF_HIGHLIGHTED);
}
bool AbbreviationExpander::tryExpandToken (TokenChain &tokens, size_t &offset, Molecule &m, AttPoint &attach_to)
{
   Token &cur = tokens[offset];

   if (cur.multiplier != 1)
      return false;

   Array<int> connection_points;
   if (cur.type == Token::Element)
   {
      if (cur.index == ELEM_H)
      {
         offset++;
         attach_to = AttPoint(-1, 0);
         return true;
      }
      int added = m.addAtom(cur.index);

      // Get the number of bonds to connect
      int valence, hyd;
      int conn = attach_to.order;
      if (offset + 1 < tokens.size())
      {
         Token &next = tokens[offset + 1];
         conn += next.multiplier;
      }

      if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false))
      {
         // Ignore next atom
         // Appear in the OH3C case when H3 is belong to C
         conn = attach_to.order;
         if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false))
            return false;
      }

      for (int i = 0; i < hyd + conn; i++)
         connection_points.push(added);
   }
   else if (cur.type == Token::Pattern)
   {
      // Add pattern
      BufferScanner scanner(abbreviations[cur.index]->expansion.c_str());
      SmilesLoader loader(scanner);

      Molecule abbr;
      loader.loadMolecule(abbr);

      Array<int> mapping;
      Array<int> rsites;
      m.mergeWithMolecule(abbr, &mapping);
      for (int v = abbr.vertexBegin(); v != abbr.vertexEnd(); v = abbr.vertexNext(v))
      {
         int mapped = mapping[v];
         if (m.isRSite(mapped))
         {
            dword bits = m.getRSiteBits(mapped);
            int id1 = bitGetOneHOIndex(bits);
            int id2 = bitGetOneHOIndex(bits);
            if (id1 != id2)
               throw Exception("Invalid abbreviations specification: %s", 
                  abbreviations[cur.index]->expansion.c_str());
            if (id1 != 0)
               id1--; // R == R1

            const Vertex &vertex = m.getVertex(mapped);
            int nei = vertex.neiBegin();

            connection_points.expandFill(id1 + 1, -1);
            connection_points[id1] = vertex.neiVertex(nei); // Point connected to the RSite

            rsites.push(mapped);
         }
      }
      m.removeAtoms(rsites);
   }
   else
      return false;

   bool rollback = false;
   int atom_bound = m.vertexCount();
   size_t offset2 = offset + 1;

   attachBond(m, attach_to, connection_points[0]);
   int i = attach_to.order;
   while (i < connection_points.size() - 1 && !rollback)
   {
      if (offset2 >= tokens.size())
      {
         // If we are at the end then there can be an implicit double bond
         // Example: -CH2CH=
         // When we read C H there are no more tokens
         break;
      }

      Token &next = tokens[offset2];
      for (int j = 0; j < next.multiplier; j++)
      {
         if (i >= connection_points.size())
         {
            rollback = true;
            break;
         }

         if (next.type == Token::Branch)
         {
            AttPoint point(connection_points[i], 1);

            if (!expandParsedTokensWithRev(next.branch, m, point) || point.index != -1)
            {
               rollback = true;
               break;
            }
         }
         else
         {
            TokenChain chain;
            chain.push_back(next);
            chain[0].multiplier = 1;
            size_t local_offset = 0;
            AttPoint point(connection_points[i], 1);
            if (!tryExpandToken(chain, local_offset, m, point) || point.index != -1)
            {
               rollback = true;
               break;
            }
         }
         i++;
      }
      offset2++;
   }

   if (i > connection_points.size())
      rollback = true;
   if (!rollback)
   {
      if (i == connection_points.size())
      {
         // This is terminal
         attach_to = AttPoint(-1, 0);
      }
      else if (i == connection_points.size() - 1)
         attach_to = AttPoint(connection_points[i], 1); // Last attachment point
      else
      {
         // Number of tokens are incomlete means that there are double bonds after
         attach_to = AttPoint(connection_points[i], connection_points.size() - i);
      }
   }

   if (rollback)
   {
      // Rollback
      Array<int> new_atoms;
      for (int v = m.vertexBegin(); v != m.vertexEnd(); v = m.vertexNext(v))
         if (v >= atom_bound)
            new_atoms.push(v);
      m.removeAtoms(new_atoms);
      return false;
   }
   offset = offset2;
   return true;
}