bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    std::istream &ifs = *pConv->GetInStream();
    if (!ifs.good()) //Possible to omit? ifs.peek() == EOF || 
      return false;

    OBMol* pmol = new OBMol;

    std::string auditMsg = "OpenBabel::Read molecule ";
    std::string description(pFormat->Description());
    auditMsg += description.substr(0,description.find('\n'));
    obErrorLog.ThrowError(__FUNCTION__,
                          auditMsg,
                          obAuditMsg);

    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return DeferMolOutput(pmol, pConv, pFormat);

    bool ret;
   if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
   {
     //On first call, separate molecule and put fragments in MolArray.
     //On subsequent calls, remove a fragment from MolArray and send it for writing
     //Done this way so that each fragment can be written to its own file (with -m option)
     if(!StoredMolsReady)
     {
       ret = pFormat->ReadMolecule(pmol,pConv); 
       if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
         MolArray = pmol->Separate(); //use un-transformed molecule
       //Add an appropriate title to each fragment
       for(int i=0;i<MolArray.size();++i)
       {
         stringstream ss;
         ss << pmol->GetTitle() << '#' << i+1;
         string title = ss.str();
         MolArray[i].SetTitle(title);
       }
       reverse(MolArray.begin(),MolArray.end());
       StoredMolsReady = true;
       //Clear the flags of the input stream(which may have found eof) to ensure will
       //try to read anothe molecule and allow the stored ones to be sent for output.
       pConv->GetInStream()->clear();
     }

     if(MolArray.empty()) //normal end of fragments
       ret =false;
     else
     {
       // Copying is needed because the OBMol passed to AddChemObject will be deleted.
       // The OBMol in the vector is deleted here.
       OBMol* pMolCopy = new OBMol( MolArray.back());
       MolArray.pop_back();
       ret = pConv->AddChemObject(
           pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))!=0;
     }
     if(!ret)
       StoredMolsReady = false;

     delete pmol;
     return ret;
   }

    ret=pFormat->ReadMolecule(pmol,pConv); 

    OBMol* ptmol = NULL;
    //Molecule is valid if it has some atoms 
    //or the format allows zero-atom molecules and it has a title
    if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
    {
      ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
      if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) 
                || pConv->IsOption("join",OBConversion::GENOPTIONS)))
      {
        //With j option, accumulate all mols in one stored in this class
        if(pConv->IsFirstInput())
          _jmol = new OBMol;
        pConv->AddChemObject(_jmol);
        //will be discarded in WriteChemObjectImpl until the last input mol. This complication
        //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
        //is zeroed at the end of a file and _jmol is in danger of not being output.
        *_jmol += *ptmol;
        delete ptmol;
        return true;
      }
    }
    else
      delete pmol;

    // Normal operation - send molecule to be written
    ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading
    return ret;
  }
Exemple #2
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  OBConversion conv;
  OBFormat *inFormat, *canFormat;
  OBMol mol;
  ifstream ifs;
  vector<OBMol> fragments;
  unsigned int fragmentCount = 0; // track how many in library -- give a running count
  map<string, int> index; // index of cansmi
  string currentCAN;
  unsigned int size;
  OBAtom *atom;
  OBBond *bond;
  bool nonRingAtoms, nonRingBonds;
  char buffer[BUFF_SIZE];

  canFormat = conv.FindFormat("can");
  conv.SetOutFormat(canFormat);

  if (argc < 2)
    {
      cout << "Usage: obfragment <file>" << endl;
      return(-1);
    }

  for (int i = 1; i < argc; i++) {
    cerr << " Reading file " << argv[i] << endl;

    inFormat = conv.FormatFromExt(argv[i]);
    if(inFormat==NULL || !conv.SetInFormat(inFormat))
      {
        cerr << " Cannot read file format for " << argv[i] << endl;
        continue; // try next file
      }
    
    ifs.open(argv[i]);
    
    if (!ifs)
      {
        cerr << "Cannot read input file: " << argv[i] << endl;
        continue;
      }
    
    
    while(ifs.peek() != EOF && ifs.good())
      {
        conv.Read(&mol, &ifs);
        if (!mol.Has3D()) continue; // invalid coordinates!
        mol.DeleteHydrogens(); // remove these before we do anything else
        
        do {
          nonRingAtoms = false;
          size = mol.NumAtoms();
          for (unsigned int i = 1; i <= size; ++i)
            {
              atom = mol.GetAtom(i);
              if (!atom->IsInRing()) {
                mol.DeleteAtom(atom);
                nonRingAtoms = true;
                break; // don't know how many atoms there are
              } 
              // Previously, we changed atoms to carbon here.
              // Now we perform this alchemy in terms of string-rewriting
              // once the canonical SMILES is generated
            }
        } while (nonRingAtoms);
        
        if (mol.NumAtoms() < 3)
          continue;
        
        if (mol.NumBonds() == 0)
          continue;
        
        do {
          nonRingBonds = false;
          size = mol.NumBonds();
          for (unsigned int i = 0; i < size; ++i)
            {
              bond = mol.GetBond(i);
              if (!bond->IsInRing()) {
                mol.DeleteBond(bond);
                nonRingBonds = true;
                break; // don't know how many bonds there are
              }
            }        
        } while (nonRingBonds);

        fragments = mol.Separate();
        for (unsigned int i = 0; i < fragments.size(); ++i)
          {
            if (fragments[i].NumAtoms() < 3) // too small to care
              continue;
              
            currentCAN = conv.WriteString(&fragments[i], true);
            currentCAN = RewriteSMILES(currentCAN); // change elements to "a/A" for compression
            if (index.find(currentCAN) != index.end()) { // already got this
              index[currentCAN] += 1; // add to the count for bookkeeping
              continue;
            }

            index[currentCAN] = 1; // don't ever write this ring fragment again

            // OK, now retrieve the canonical ordering for the fragment
            vector<string> canonical_order;
            if (fragments[i].HasData("Canonical Atom Order")) {
              OBPairData *data = (OBPairData*)fragments[i].GetData("Canonical Atom Order");
              tokenize(canonical_order, data->GetValue().c_str());
            }

            // Write out an XYZ-style file with the CANSMI as the title
            cout << fragments[i].NumAtoms() << '\n';
            cout << currentCAN << '\n'; // endl causes a flush

            vector<string>::iterator can_iter;
            unsigned int order;
            OBAtom *atom;

            fragments[i].Center();
            fragments[i].ToInertialFrame();

            for (unsigned int index = 0; index < canonical_order.size(); 
                 ++index) {
              order = atoi(canonical_order[index].c_str());
              atom = fragments[i].GetAtom(order);
              
              snprintf(buffer, BUFF_SIZE, "C%8.3f%8.3f%8.3f\n",
                       atom->x(), atom->y(), atom->z());
              cout << buffer;
            }

          }
        fragments.clear();
        if (index.size() > fragmentCount) {
          fragmentCount = index.size();
          cerr << " Fragments: " << fragmentCount << endl;
        }

      } // while reading molecules (in this file)
    ifs.close();
    ifs.clear();
  } // while reading files

  // loop through the map and output frequencies
  map<string, int>::const_iterator indexItr;
  for (indexItr = index.begin(); indexItr != index.end(); ++indexItr) {
    cerr << (*indexItr).second << " INDEX " << (*indexItr).first << "\n";
  }
    
  return(0);
}