Пример #1
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  OBConversion conv;
  OBFormat *inFormat, *canFormat;
  OBMol mol;
  ifstream ifs;
  vector<OBMol> fragments;
  unsigned int fragmentCount = 0; // track how many in library -- give a running count
  map<string, int> index; // index of cansmi
  string currentCAN;
  unsigned int size;
  OBAtom *atom;
  OBBond *bond;
  bool nonRingAtoms, nonRingBonds;
  char buffer[BUFF_SIZE];

  canFormat = conv.FindFormat("can");
  conv.SetOutFormat(canFormat);

  if (argc < 2)
    {
      cout << "Usage: obfragment <file>" << endl;
      return(-1);
    }

  for (int i = 1; i < argc; i++) {
    cerr << " Reading file " << argv[i] << endl;

    inFormat = conv.FormatFromExt(argv[i]);
    if(inFormat==NULL || !conv.SetInFormat(inFormat))
      {
        cerr << " Cannot read file format for " << argv[i] << endl;
        continue; // try next file
      }
    
    ifs.open(argv[i]);
    
    if (!ifs)
      {
        cerr << "Cannot read input file: " << argv[i] << endl;
        continue;
      }
    
    
    while(ifs.peek() != EOF && ifs.good())
      {
        conv.Read(&mol, &ifs);
        if (!mol.Has3D()) continue; // invalid coordinates!
        mol.DeleteHydrogens(); // remove these before we do anything else
        
        do {
          nonRingAtoms = false;
          size = mol.NumAtoms();
          for (unsigned int i = 1; i <= size; ++i)
            {
              atom = mol.GetAtom(i);
              if (!atom->IsInRing()) {
                mol.DeleteAtom(atom);
                nonRingAtoms = true;
                break; // don't know how many atoms there are
              } 
              // Previously, we changed atoms to carbon here.
              // Now we perform this alchemy in terms of string-rewriting
              // once the canonical SMILES is generated
            }
        } while (nonRingAtoms);
        
        if (mol.NumAtoms() < 3)
          continue;
        
        if (mol.NumBonds() == 0)
          continue;
        
        do {
          nonRingBonds = false;
          size = mol.NumBonds();
          for (unsigned int i = 0; i < size; ++i)
            {
              bond = mol.GetBond(i);
              if (!bond->IsInRing()) {
                mol.DeleteBond(bond);
                nonRingBonds = true;
                break; // don't know how many bonds there are
              }
            }        
        } while (nonRingBonds);

        fragments = mol.Separate();
        for (unsigned int i = 0; i < fragments.size(); ++i)
          {
            if (fragments[i].NumAtoms() < 3) // too small to care
              continue;
              
            currentCAN = conv.WriteString(&fragments[i], true);
            currentCAN = RewriteSMILES(currentCAN); // change elements to "a/A" for compression
            if (index.find(currentCAN) != index.end()) { // already got this
              index[currentCAN] += 1; // add to the count for bookkeeping
              continue;
            }

            index[currentCAN] = 1; // don't ever write this ring fragment again

            // OK, now retrieve the canonical ordering for the fragment
            vector<string> canonical_order;
            if (fragments[i].HasData("Canonical Atom Order")) {
              OBPairData *data = (OBPairData*)fragments[i].GetData("Canonical Atom Order");
              tokenize(canonical_order, data->GetValue().c_str());
            }

            // Write out an XYZ-style file with the CANSMI as the title
            cout << fragments[i].NumAtoms() << '\n';
            cout << currentCAN << '\n'; // endl causes a flush

            vector<string>::iterator can_iter;
            unsigned int order;
            OBAtom *atom;

            fragments[i].Center();
            fragments[i].ToInertialFrame();

            for (unsigned int index = 0; index < canonical_order.size(); 
                 ++index) {
              order = atoi(canonical_order[index].c_str());
              atom = fragments[i].GetAtom(order);
              
              snprintf(buffer, BUFF_SIZE, "C%8.3f%8.3f%8.3f\n",
                       atom->x(), atom->y(), atom->z());
              cout << buffer;
            }

          }
        fragments.clear();
        if (index.size() > fragmentCount) {
          fragmentCount = index.size();
          cerr << " Fragments: " << fragmentCount << endl;
        }

      } // while reading molecules (in this file)
    ifs.close();
    ifs.clear();
  } // while reading files

  // loop through the map and output frequencies
  map<string, int>::const_iterator indexItr;
  for (indexItr = index.begin(); indexItr != index.end(); ++indexItr) {
    cerr << (*indexItr).second << " INDEX " << (*indexItr).first << "\n";
  }
    
  return(0);
}
Пример #2
0
///////////////////////////////////////////////////////////////////////////////
//! \brief  Generate rough 3D coordinates for SMILES (or other 0D files).
//
int main(int argc,char **argv)
{
  char *program_name= argv[0];
  int c;
  string basename, filename = "", option, option2, ff = "MMFF94";

  list<string> argl(argv+1, argv+argc);

  list<string>::iterator optff = find(argl.begin(), argl.end(), "-ff");
  if (optff != argl.end()) {
    list<string>::iterator optffarg = optff;
    ++optffarg;

    if (optffarg != argl.end()) {
      ff = *optffarg;

      argl.erase(optff,++optffarg);
    } else {
      argl.erase(optff);
    }
  }

  if (argl.empty()) {
    cout << "Usage: obgen <filename> [options]" << endl;
    cout << endl;
    cout << "options:      description:" << endl;
    cout << endl;
    cout << "  -ff         select a forcefield" << endl;
    cout << endl;
    OBPlugin::List("forcefields", "verbose");
    exit(-1);
  }

  basename = filename = *argl.begin();
  size_t extPos = filename.rfind('.');

  if (extPos!= string::npos) {
    basename = filename.substr(0, extPos);
  }

  // Find Input filetype
  OBConversion conv;
  OBFormat *format_in = conv.FormatFromExt(filename.c_str());
  OBFormat *format_out = conv.FindFormat("sdf");

  if (!format_in || !format_out || !conv.SetInAndOutFormats(format_in, format_out)) {
    cerr << program_name << ": cannot read input/output format!" << endl;
    exit (-1);
  }

  ifstream ifs;
  ofstream ofs;

  // Read the file
  ifs.open(filename.c_str());
  if (!ifs) {
    cerr << program_name << ": cannot read input file!" << endl;
    exit (-1);
  }

  OBMol mol;

  for (c=1;;c++) {
      mol.Clear();
      if (!conv.Read(&mol, &ifs))
        break;
      if (mol.Empty())
        break;

      OBForceField* pFF = OBForceField::FindForceField(ff);
      if (!pFF) {
        cerr << program_name << ": could not find forcefield '" << ff << "'." <<endl;
        exit (-1);
      }

      //mol.AddHydrogens(false, true); // hydrogens must be added before Setup(mol) is called

      pFF->SetLogFile(&cerr);
      pFF->SetLogLevel(OBFF_LOGLVL_LOW);

      //pFF->GenerateCoordinates();
      OBBuilder builder;
      builder.Build(mol);

      mol.AddHydrogens(false, true); // hydrogens must be added before Setup(mol) is called
      if (!pFF->Setup(mol)) {
        cerr << program_name << ": could not setup force field." << endl;
        exit (-1);
      }

      pFF->SteepestDescent(500, 1.0e-4);
      pFF->WeightedRotorSearch(250, 50);
      pFF->SteepestDescent(500, 1.0e-6);

      pFF->UpdateCoordinates(mol);
      //pFF->ValidateGradients();
      //pFF->SetLogLevel(OBFF_LOGLVL_HIGH);
      //pFF->Energy();


      //char FileOut[32];
      //sprintf(FileOut, "%s_obgen.pdb", basename.c_str());
      //ofs.open(FileOut);
      //conv.Write(&mol, &ofs);
      //ofs.close();
      conv.Write(&mol, &cout);
  } // end for loop

  return(0);
}
Пример #3
0
int main(int argc,char **argv)
{
  char *program_name= argv[0];
  int c;
  int steps = 2500;
  double crit = 1e-6;
  bool sd = false;
  bool cut = false;
  bool newton = false;
  bool hydrogens = false;
  double rvdw = 6.0;
  double rele = 10.0;
  int freq = 10;
  string basename, filename = "", option, option2, ff = "MMFF94";
  char *oext;
  OBConversion conv;
  OBFormat *format_out = conv.FindFormat("pdb"); // default output format

  if (argc < 2) {
    cout << "Usage: obminimize [options] <filename>" << endl;
    cout << endl;
    cout << "options:      description:" << endl;
    cout << endl;
    cout << "  -c crit     set convergence criteria (default=1e-6)" << endl;
    cout << endl;
    cout << "  -cg         use conjugate gradients algorithm (default)" << endl;
    cout << endl;
    cout << "  -sd         use steepest descent algorithm" << endl;
    cout << endl;
    cout << "  -newton     use Newton2Num linesearch (default=Simple)" << endl;
    cout << endl;
    cout << "  -ff ffid    select a forcefield:" << endl;
    cout << endl;
    cout << "  -h          add hydrogen atoms" << endl;
    cout << endl;
    cout << "  -n steps    specify the maximum numer of steps (default=2500)" << endl;
    cout << endl;
    cout << "  -cut        use cut-off (default=don't use cut-off)" << endl;
    cout << endl;
    cout << "  -rvdw rvdw  specify the VDW cut-off distance (default=6.0)" << endl;
    cout << endl;
    cout << "  -rele rele  specify the Electrostatic cut-off distance (default=10.0)" << endl;
    cout << endl;
    cout << "  -pf freq    specify the frequency to update the non-bonded pairs (default=10)" << endl;
    cout << endl;
    OBPlugin::List("forcefields", "verbose");
    exit(-1);
  } else {
    int ifile = 1;
    for (int i = 1; i < argc; i++) {
      option = argv[i];

      // steps
      if ((option == "-n") && (argc > (i+1))) {
        steps = atoi(argv[i+1]);
        ifile += 2;
      }
      // vdw cut-off
      if ((option == "-rvdw") && (argc > (i+1))) {
        rvdw = atof(argv[i+1]);
        ifile += 2;
      }
      // ele cut-off
      if ((option == "-rele") && (argc > (i+1))) {
        rele = atof(argv[i+1]);
        ifile += 2;
      }
      // pair update frequency
      if ((option == "-pf") && (argc > (i+1))) {
        freq = atoi(argv[i+1]);
        ifile += 2;
      }
      // steepest descent
      if (option == "-sd") {
        sd = true;
        ifile++;
      }
      // enable cut-off
      if (option == "-cut") {
        cut = true;
        ifile++;
      }
      // enable Newton2Num
      if (option == "-newton") {
        newton = true;
        ifile++;
      }

      if (strncmp(option.c_str(), "-o", 2) == 0) {
        oext = argv[i] + 2;
        if(!*oext) {
          oext = argv[++i]; //space left after -o: use next argument
          ifile++;
        }

        format_out = conv.FindFormat(oext);
        ifile++;
      }

      if (option == "-h") {
        hydrogens = true;
        ifile++;
      }

      if (option == "-cg") {
        sd = false;
        ifile++;
      }

      if ((option == "-c") && (argc > (i+1))) {
        crit = atof(argv[i+1]);
        ifile += 2;
      }

      if ((option == "-ff") && (argc > (i+1))) {
        ff = argv[i+1];
        ifile += 2;
      }
    }

    basename = filename = argv[ifile];
    size_t extPos = filename.rfind('.');

    if (extPos!= string::npos) {
      basename = filename.substr(0, extPos);
    }
  }

  // Find Input filetype
  OBFormat *format_in = conv.FormatFromExt(filename.c_str());

  if (!format_in || !format_out || !conv.SetInAndOutFormats(format_in, format_out)) {
    cerr << program_name << ": cannot read input/output format!" << endl;
    exit (-1);
  }

  ifstream ifs;
  ofstream ofs;

  // Read the file
  ifs.open(filename.c_str());
  if (!ifs) {
    cerr << program_name << ": cannot read input file!" << endl;
    exit (-1);
  }

  OBForceField* pFF = OBForceField::FindForceField(ff);
  if (!pFF) {
    cerr << program_name << ": could not find forcefield '" << ff << "'." <<endl;
    exit (-1);
  }

  // set some force field variables
  pFF->SetLogFile(&cerr);
  pFF->SetLogLevel(OBFF_LOGLVL_LOW);
  pFF->SetVDWCutOff(rvdw);
  pFF->SetElectrostaticCutOff(rele);
  pFF->SetUpdateFrequency(freq);
  pFF->EnableCutOff(cut);
  if (newton)
    pFF->SetLineSearchType(LineSearchType::Newton2Num);

  OBMol mol;

  for (c=1;;c++) {
    mol.Clear();
    if (!conv.Read(&mol, &ifs))
      break;
    if (mol.Empty())
      break;

    if (hydrogens)
      mol.AddHydrogens();

    if (!pFF->Setup(mol)) {
      cerr << program_name << ": could not setup force field." << endl;
      exit (-1);
    }

    bool done = true;
    OBStopwatch timer;
    timer.Start();
    if (sd) {
      pFF->SteepestDescentInitialize(steps, crit);
    } else {
      pFF->ConjugateGradientsInitialize(steps, crit);
    }

    unsigned int totalSteps = 1;
    while (done) {
      if (sd)
        done = pFF->SteepestDescentTakeNSteps(1);
      else
        done = pFF->ConjugateGradientsTakeNSteps(1);
      totalSteps++;

      if (pFF->DetectExplosion()) {
        cerr << "explosion has occured!" << endl;
        conv.Write(&mol, &cout);
        return(1);
      } else
        pFF->GetCoordinates(mol);
    }
    double timeElapsed = timer.Elapsed();

    pFF->GetCoordinates(mol);

    conv.Write(&mol, &cout);
    cerr << "Time: " << timeElapsed << "seconds. Iterations per second: " <<  double(totalSteps) / timeElapsed << endl;
  } // end for loop

  return(0);
}
Пример #4
0
void residue_test()
{
  cout << "# Unit tests for OBResidue \n";

  // OBResidue isolation tests
  OBResidue emptyResidue, testRes1;

  // chains parser tests

  // PR#1515198
  static const string loopTest1("C1(C(NC(C(N1C(C(NC(C=Cc1ccccc1)=O)C)=O)Cc1ccccc1)=O)Cc1ccccc1)=O");
  OBConversion conv;
  OBMol mol;
  OBFormat *inFormat = conv.FindFormat("SMI");
  
  conv.SetInFormat(inFormat);
  conv.ReadString(&mol, loopTest1);
  chainsparser.PerceiveChains(mol);

  // parse common residues
  unsigned int testCount = 3;
  static const string ala("NC(C)C(O)(=O)");
  CheckValidResidue(conv, ala, ++testCount);
  static const string arg("NC(CCCNC(N)=N)C(O)(=O)");
  CheckValidResidue(conv, arg, ++testCount);
  static const string asn("NC(CC(N)=O)C(O)(=O)");
  CheckValidResidue(conv, asn, ++testCount);
  static const string asp("NC(CC(O)=O)C(O)(=O)");
  CheckValidResidue(conv, asp, ++testCount);
  static const string cys("NC(CS)C(O)(=O)");
  CheckValidResidue(conv, cys, ++testCount);
  static const string glu("NC(CCC(O)=O)C(O)(=O)");
  CheckValidResidue(conv, glu, ++testCount);
  static const string gln("NC(CCC(N)=O)C(O)(=O)");
  CheckValidResidue(conv, gln, ++testCount);
  static const string gly("NC([H])C(O)(=O)");
  CheckValidResidue(conv, gly, ++testCount);
  static const string his("NC(CC1=CNC=N1)C(O)(=O)");
  CheckValidResidue(conv, his, ++testCount);
  static const string ile("NC(C(CC)C)C(O)(=O)");
  CheckValidResidue(conv, ile, ++testCount);
  static const string leu("NC(CC(C)C)C(O)(=O)");
  CheckValidResidue(conv, leu, ++testCount);
  static const string lys("NC(CCCCN)C(O)(=O)");
  CheckValidResidue(conv, lys, ++testCount);
  static const string met("NC(CCSC)C(O)(=O)");
  CheckValidResidue(conv, met, ++testCount);
  static const string phe("NC(CC1=CC=CC=C1)C(O)(=O)");
  CheckValidResidue(conv, phe, ++testCount);
  static const string pro("OC(C1CCCN1)(=O)");
  CheckValidResidue(conv, pro, ++testCount);
  static const string ser("NC(CO)C(O)(=O)");
  CheckValidResidue(conv, ser, ++testCount);
  static const string thr("NC(C(C)O)C(O)(=O)");
  CheckValidResidue(conv, thr, ++testCount);
  static const string trp("NC(CC1=CNC2=C1C=CC=C2)C(O)(=O)");
  CheckValidResidue(conv, trp, ++testCount);
  static const string tyr("NC(CC1=CC=C(O)C=C1)C(O)(=O)");
  CheckValidResidue(conv, tyr, ++testCount);
  static const string val("NC(C(C)C)C(O)(=O)");
  CheckValidResidue(conv, val, ++testCount);

  // nucleics
  static const string a("OC[C@H]1O[C@H](C[C@@H]1O)n1cnc2c(ncnc12)N");
  CheckValidResidue(conv, a, ++testCount);
  static const string g("OC[C@H]1O[C@H](C[C@@H]1O)n1c(nc(cc1)N)=O");
  CheckValidResidue(conv, g, ++testCount);
  static const string c("OC[C@H]1O[C@H](C[C@@H]1O)n1cnc2c([nH]c(nc12)N)=O");
  CheckValidResidue(conv, c, ++testCount);
  static const string t("OC[C@H]1O[C@H](C[C@@H]1O)n1c([nH]c(c(c1)C)=O)=O");
  CheckValidResidue(conv, t, ++testCount);
  static const string u("OC[C@H]1O[C@H]([C@@H]([C@@H]1O)O)n1c([nH]c(cc1)=O)=O");
  CheckValidResidue(conv, u, ++testCount);

  // invalid residues
  static const string benzene("c1ccccc1");
  CheckInvalidResidue(conv, benzene, ++testCount);
  static const string pyrrole("c1cccn[H]1");
  CheckInvalidResidue(conv, pyrrole, ++testCount);
  static const string amine("CC(=O)CCN");
  CheckInvalidResidue(conv, amine, ++testCount);

  // check some dipeptides
  static const string ala_val("NC(C)C(=O)NC(C(C)C)C(=O)O");
  CheckValidDipeptide(conv, ala_val, ++testCount);
  static const string cys_leu("NC(CS)C(=O)NC(CC(C)C)C(=O)O");
  CheckValidDipeptide(conv, cys_leu, ++testCount);
 
}
Пример #5
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  if (argc != 1)
    {
      cout << "Usage: residue" << endl;
      cout << " Unit tests for OBResidue " << endl;
      return(-1);
    }

  cout << "# Unit tests for OBResidue \n";

  cout << "ok 1\n"; // for loading tests

  // OBResidue isolation tests
  OBResidue emptyResidue, testRes1;
  cout << "ok 2\n"; // ctor works

  // chains parser tests

  // PR#1515198
  static const string loopTest1("C1(C(NC(C(N1C(C(NC(C=Cc1ccccc1)=O)C)=O)Cc1ccccc1)=O)Cc1ccccc1)=O");
  OBConversion conv;
  OBMol mol;
  OBFormat *inFormat = conv.FindFormat("SMI");
  
  conv.SetInFormat(inFormat);
  conv.ReadString(&mol, loopTest1);
  chainsparser.PerceiveChains(mol);

  // OK if it doesn't crash
  cout << "ok 3\n";

  // parse common residues
  unsigned int testCount = 3;
  static const string ala("NC(C)C(O)(=O)");
  CheckValidResidue(conv, ala, ++testCount);
  static const string arg("NC(CCCNC(N)=N)C(O)(=O)");
  CheckValidResidue(conv, arg, ++testCount);
  static const string asn("NC(CC(N)=O)C(O)(=O)");
  CheckValidResidue(conv, asn, ++testCount);
  static const string asp("NC(CC(O)=O)C(O)(=O)");
  CheckValidResidue(conv, asp, ++testCount);
  static const string cys("NC(CS)C(O)(=O)");
  CheckValidResidue(conv, cys, ++testCount);
  static const string glu("NC(CCC(O)=O)C(O)(=O)");
  CheckValidResidue(conv, glu, ++testCount);
  static const string gln("NC(CCC(N)=O)C(O)(=O)");
  CheckValidResidue(conv, gln, ++testCount);
  static const string gly("NC([H])C(O)(=O)");
  CheckValidResidue(conv, gly, ++testCount);
  static const string his("NC(CC1=CNC=N1)C(O)(=O)");
  CheckValidResidue(conv, his, ++testCount);
  static const string ile("NC(C(CC)C)C(O)(=O)");
  CheckValidResidue(conv, ile, ++testCount);
  static const string leu("NC(CC(C)C)C(O)(=O)");
  CheckValidResidue(conv, leu, ++testCount);
  static const string lys("NC(CCCCN)C(O)(=O)");
  CheckValidResidue(conv, lys, ++testCount);
  static const string met("NC(CCSC)C(O)(=O)");
  CheckValidResidue(conv, met, ++testCount);
  static const string phe("NC(CC1=CC=CC=C1)C(O)(=O)");
  CheckValidResidue(conv, phe, ++testCount);
  static const string pro("OC(C1CCCN1)(=O)");
  CheckValidResidue(conv, pro, ++testCount);
  static const string ser("NC(CO)C(O)(=O)");
  CheckValidResidue(conv, ser, ++testCount);
  static const string thr("NC(C(C)O)C(O)(=O)");
  CheckValidResidue(conv, thr, ++testCount);
  static const string trp("NC(CC1=CNC2=C1C=CC=C2)C(O)(=O)");
  CheckValidResidue(conv, trp, ++testCount);
  static const string tyr("NC(CC1=CC=C(O)C=C1)C(O)(=O)");
  CheckValidResidue(conv, tyr, ++testCount);
  static const string val("NC(C(C)C)C(O)(=O)");
  CheckValidResidue(conv, val, ++testCount);

  // nucleics
  static const string a("OC[C@H]1O[C@H](C[C@@H]1O)n1cnc2c(ncnc12)N");
  CheckValidResidue(conv, a, ++testCount);
  static const string g("OC[C@H]1O[C@H](C[C@@H]1O)n1c(nc(cc1)N)=O");
  CheckValidResidue(conv, g, ++testCount);
  static const string c("OC[C@H]1O[C@H](C[C@@H]1O)n1cnc2c([nH]c(nc12)N)=O");
  CheckValidResidue(conv, c, ++testCount);
  static const string t("OC[C@H]1O[C@H](C[C@@H]1O)n1c([nH]c(c(c1)C)=O)=O");
  CheckValidResidue(conv, t, ++testCount);
  static const string u("OC[C@H]1O[C@H]([C@@H]([C@@H]1O)O)n1c([nH]c(cc1)=O)=O");
  CheckValidResidue(conv, u, ++testCount);

  // invalid residues
  static const string benzene("c1ccccc1");
  CheckInvalidResidue(conv, benzene, ++testCount);
  static const string pyrrole("c1cccn[H]1");
  CheckInvalidResidue(conv, pyrrole, ++testCount);
  static const string amine("CC(=O)CCN");
  CheckInvalidResidue(conv, amine, ++testCount);

  // check some dipeptides
  static const string ala_val("NC(C)C(=O)NC(C(C)C)C(=O)O");
  CheckValidDipeptide(conv, ala_val, ++testCount);
  static const string cys_leu("NC(CS)C(=O)NC(CC(C)C)C(=O)O");
  CheckValidDipeptide(conv, cys_leu, ++testCount);
 
  // the number of tests for "prove"
  cout << "1.." << testCount << "\n";

  return(0);
}
Пример #6
0
void mmff94_validate()
{
  OBForceField* pFF = OBForceField::FindForceField("MMFF94");

  OBConversion conv;
  OBFormat *format_in = conv.FindFormat("mol2");
  vector<string> vs;
  vector<int> types;
  vector<double> fcharges, pcharges;
  vector<double> bond_lengths;
  char buffer[BUFF_SIZE], _logbuf[BUFF_SIZE];
  bool molfound, atomfound, bondfound, fchgfound, pchgfound;
  double etot, ebond, eangle, eoop, estbn, etor, evdw, eeq;
  double termcount; //1=bond, 2=angle, 3=strbnd, 4=torsion, 5=oop
  int n = 0;

  BOOST_REQUIRE_MESSAGE( format_in && conv.SetInFormat(format_in), "Could not set mol2 input format" );

  ifstream ifs, ifs2;
  ofstream ofs;

  ifs.open("MMFF94_dative.mol2");
  BOOST_REQUIRE_MESSAGE( ifs, "Could not open ./MMFF94_dative.mol2" );
 
  ifs2.open("MMFF94_opti.log");
  BOOST_REQUIRE_MESSAGE( ifs2, "Could not open ./MMFF94_opti.log" );
    
  ofs.open("MMFF94_openbabel.log");
  BOOST_REQUIRE_MESSAGE( ofs, "Could not open ./MMFF94_openbabel.log" );
    
  pFF->SetLogFile(&ofs);
  pFF->SetLogLevel(OBFF_LOGLVL_HIGH);
   
  OBMol mol;
  for (unsigned int c=1;; c++) {
    mol.Clear();
    types.clear();
    fcharges.clear();
    pcharges.clear();
    bond_lengths.clear();

    if (!conv.Read(&mol, &ifs))
      break;
    if (mol.Empty())
      break;
     
    BOOST_CHECK_MESSAGE( pFF->Setup(mol), "Could not setup calculations (missing parameters...)" );

    pFF->GetAtomTypes(mol);
    //pFF->GetFormalCharges(mol);
    pFF->GetPartialCharges(mol);

    termcount = 0;
    molfound = false;
    atomfound = false;
    bondfound = false;
    fchgfound = false;
    pchgfound = false;

    // Parse log file for types, charges, energies, ..
    while (ifs2.getline(buffer, 150)) {
      tokenize(vs, buffer);
      if (vs.size() == 0) {
        bondfound = false;
        continue;
      }
	
      string str(buffer);
      if (string::npos != str.find(mol.GetTitle(),0))
        molfound = true;

      // read atom types
      if (atomfound) {
        if (n) {
          types.push_back(atoi(vs[2].c_str()));
          types.push_back(atoi(vs[5].c_str()));
          types.push_back(atoi(vs[8].c_str()));
          types.push_back(atoi(vs[11].c_str()));
        } else {
          if (vs.size() > 2)
            types.push_back(atoi(vs[2].c_str()));
          if (vs.size() > 5)
            types.push_back(atoi(vs[5].c_str()));
          if (vs.size() > 8)
            types.push_back(atoi(vs[8].c_str()));
   
          atomfound = false;
        }
        n--;
      }
        
      // read formal charges
      if (fchgfound) {
        if (n) {
          fcharges.push_back(atof(vs[2].c_str()));
          fcharges.push_back(atof(vs[5].c_str()));
          fcharges.push_back(atof(vs[8].c_str()));
          fcharges.push_back(atof(vs[11].c_str()));
        } else {
          if (vs.size() > 2)
            fcharges.push_back(atof(vs[2].c_str()));
          if (vs.size() > 5)
            fcharges.push_back(atof(vs[5].c_str()));
          if (vs.size() > 8)
            fcharges.push_back(atof(vs[8].c_str()));
   
          fchgfound = false;
        }
        n--;
      }
 
      // read partial charges
      if (pchgfound) {
        if (n) {
          pcharges.push_back(atof(vs[2].c_str()));
          pcharges.push_back(atof(vs[5].c_str()));
          pcharges.push_back(atof(vs[8].c_str()));
          pcharges.push_back(atof(vs[11].c_str()));
        } else {
          if (vs.size() > 2)
            pcharges.push_back(atof(vs[2].c_str()));
          if (vs.size() > 5)
            pcharges.push_back(atof(vs[5].c_str()));
          if (vs.size() > 8)
            pcharges.push_back(atof(vs[8].c_str()));
   
          pchgfound = false;
        }
        n--;
      }
 
      // identify blocks
      if (molfound && EQn(buffer, " ATOM NAME  TYPE", 16)) {
        atomfound = true;
        n = mol.NumAtoms() / 4;
      }
      if (molfound && EQn(buffer, "   ATOM   FCHARGE", 17)) {
        fchgfound = true;
        n = mol.NumAtoms() / 4;
      }
      if (molfound && EQn(buffer, "   ATOM    CHARGE", 17)) {
        pchgfound = true;
        n = mol.NumAtoms() / 4;
      }

      if (bondfound)
        bond_lengths.push_back(atof(vs[7].c_str()));

      // Get the energies
      if (molfound) {
        if (EQn(buffer, " Total ENERGY", 13))
          etot = atof(vs[3].c_str());
        if (EQn(buffer, " Bond Stretching", 16))
          ebond = atof(vs[2].c_str());
        if (EQn(buffer, " Angle Bending", 14))
          eangle = atof(vs[2].c_str());
        if (EQn(buffer, " Out-of-Plane Bending", 21))
          eoop = atof(vs[2].c_str());
        if (EQn(buffer, " Stretch-Bend", 13))
          estbn = atof(vs[1].c_str());
        if (EQn(buffer, "     Total Torsion", 18))
          etor = atof(vs[2].c_str());
        if (EQn(buffer, "     Net vdW", 12))
          evdw = atof(vs[2].c_str());
        if (EQn(buffer, " Electrostatic", 14))
          eeq = atof(vs[1].c_str());
        if (EQn(buffer, " ---------------------", 22) && (termcount == 0)) {
          termcount++;
        bondfound = true;
        }
        if (EQn(buffer, " OPTIMOL>  # read next", 22))
          break;
      }
    } // while (getline)
      
    ostringstream os;
    vector<int>::iterator i;
    vector<double>::iterator di;
    unsigned int ni;
    bool failed;

    cout << "--------------------------------------------------------------------------------" << endl;
    cout << "                                                                                " << endl;
    cout << "  VALIDATE MOLECULE " << c << ": " << mol.GetTitle() << endl;
    cout << "                                                                                " << endl;
    cout << "IDX  HYB  AROM  OB_TYPE  LOG_TYPE       RESULT                                  " << endl;
    cout << "----------------------------------------------                                  " << endl;
 
    // 
    // validate atom types
    //
    ni = 1;
    failed = false;
    for (i = types.begin(); i != types.end();i++) {
      if (ni > mol.NumAtoms())
        continue;

      OBPairData *type = (OBPairData*) mol.GetAtom(ni)->GetData("FFAtomType");
      if (!type)
	continue;

      os.str("");
      os << "In molecule " << mol.GetTitle() << ": Wrong atom type for atom ";
      os << ni << " # found " << type->GetValue() << ", expected " << *i;
      BOOST_CHECK_MESSAGE( atoi(type->GetValue().c_str()) == (*i), os.str().c_str());
      
      if (atoi(type->GetValue().c_str()) == (*i))
        snprintf(_logbuf, BUFF_SIZE, "%2d   %3d  %4d    %3d      %3d          PASSED", 
                  mol.GetAtom(ni)->GetIdx(), mol.GetAtom(ni)->GetHyb(), 
                  mol.GetAtom(ni)->IsAromatic(), atoi(mol.GetAtom(ni)->GetType()), *i);
      else {
        snprintf(_logbuf, BUFF_SIZE, "%2d   %3d  %4d    %3d      %3d      XXX FAILED XXX", 
                  mol.GetAtom(ni)->GetIdx(), mol.GetAtom(ni)->GetHyb(), 
                  mol.GetAtom(ni)->IsAromatic(), atoi(type->GetValue().c_str()), *i);
        failed = true;
      }
      
      cout << _logbuf << endl;
        
      ni++;
    }

    /*
    cout << endl;
    cout << "IDX  OB_FCARGE  LOG_FCHARGE       RESULT" << endl;
    cout << "----------------------------------------" << endl;
            
    // 
    // validate formal charges
    //
    ni = 1;
    for (di = fcharges.begin(); di != fcharges.end(); di++) {
      if (ni > mol.NumAtoms())
        continue;
	
      if (fabs((*di) - mol.GetAtom(ni)->GetPartialCharge()) <= 0.001)
        snprintf(_logbuf, BUFF_SIZE, "%2d   %7.4f     %7.4f          PASSED", mol.GetAtom(ni)->GetIdx(), mol.GetAtom(ni)->GetPartialCharge(), *di);
      else {
        snprintf(_logbuf, BUFF_SIZE, "%2d   %7.4f     %7.4f      XXX FAILED XXX", mol.GetAtom(ni)->GetIdx(), mol.GetAtom(ni)->GetPartialCharge(), *di);
        failed = true;
      }
      
      cout << _logbuf << endl;
        
      ni++;
    }

    */

    cout << endl;
    cout << "IDX  OB_PCARGE  LOG_PCHARGE       RESULT" << endl;
    cout << "----------------------------------------" << endl;
      
    // 
    // validate partial charges
    //
    ni = 1;
    for (di = pcharges.begin(); di != pcharges.end(); di++) {
      if (ni > mol.NumAtoms())
        continue;
	
      OBPairData *chg = (OBPairData*) mol.GetAtom(ni)->GetData("FFPartialCharge");
      if (!chg)
	continue;

      os.str("");
      os << "In molecule " << mol.GetTitle() << ": Wrong partial charge for atom ";
      os << ni << " # found " << chg->GetValue() << ", expected " << *di;
      BOOST_CHECK_MESSAGE( fabs((*di) - atof(chg->GetValue().c_str())) <= 0.001, os.str().c_str());

      if (fabs((*di) - atof(chg->GetValue().c_str())) <= 0.001)
        snprintf(_logbuf, BUFF_SIZE, "%2d   %7.4f     %7.4f          PASSED", 
		  mol.GetAtom(ni)->GetIdx(), atof(chg->GetValue().c_str()), *di);
      else {
        snprintf(_logbuf, BUFF_SIZE, "%2d   %7.4f     %7.4f      XXX FAILED XXX",
                  mol.GetAtom(ni)->GetIdx(), atof(chg->GetValue().c_str()), *di);
        failed = true;
      }
      
      cout << _logbuf << endl;
        
      ni++;
    }

    double ene, delta;
    cout << endl;
    cout << "TERM                     OB ENERGY     LOG ENERGY         DELTA" << endl;
    cout << "---------------------------------------------------------------" << endl;
 
    // 
    // validate energies
    //
    
    // bond stretching
    ene = pFF->E_Bond();
    delta = (ene - ebond);
    
    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong bond stretching energy ";
    os << " # found " << ene << ", expected " << ebond;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());

    snprintf(_logbuf, BUFF_SIZE, "Bond Stretching        %11.5f    %11.5f   %11.5f", ene, ebond, delta);
    cout << _logbuf << endl;
    
    // angle bending
    ene = pFF->E_Angle();
    delta = (ene - eangle);

    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong angle bending energy ";
    os << " # found " << ene << ", expected " << eangle;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());

    snprintf(_logbuf, BUFF_SIZE, "Angle Bending          %11.5f    %11.5f   %11.5f", ene, eangle, delta);
    cout << _logbuf << endl;
    
    // stretch bending
    ene = pFF->E_StrBnd();
    delta = (ene - estbn);

    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong stretch bending energy ";
    os << " # found " << ene << ", expected " << estbn;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());

    snprintf(_logbuf, BUFF_SIZE, "Stretch-Bending        %11.5f    %11.5f   %11.5f", ene, estbn, delta);
    cout << _logbuf << endl;
    
    // OOP
    ene = pFF->E_OOP();
    delta = (ene - eoop);

    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong out-of-plane bending energy ";
    os << " # found " << ene << ", expected " << eoop;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());

    snprintf(_logbuf, BUFF_SIZE, "Out-Of-Plane Bending   %11.5f    %11.5f   %11.5f", ene, eoop, delta);
    cout << _logbuf << endl;
    
    // Torsional
    ene = pFF->E_Torsion();
    delta = (ene - etor);

    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong torsional energy ";
    os << " # found " << ene << ", expected " << etor;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());
 
    snprintf(_logbuf, BUFF_SIZE, "Torsional              %11.5f    %11.5f   %11.5f", ene, etor, delta);
    cout << _logbuf << endl;
    
    // VDW
    ene = pFF->E_VDW();
    delta = (ene - evdw);

    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong van der waals energy ";
    os << " # found " << ene << ", expected " << evdw;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());
 
    snprintf(_logbuf, BUFF_SIZE, "Van der Waals          %11.5f    %11.5f   %11.5f", ene, evdw, delta);
    cout << _logbuf << endl;
      
    // Electrostatic 
    ene = pFF->E_Electrostatic();
    delta = (ene - eeq);

    os << "In molecule " << mol.GetTitle() << ": Wrong electrostatic energy ";
    os << " # found " << ene << ", expected " << eeq;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());
 
    snprintf(_logbuf, BUFF_SIZE, "Electrostatic          %11.5f    %11.5f   %11.5f", ene, eeq, delta);
    cout << _logbuf << endl;

    cout << endl;
    ene = pFF->Energy();
    delta = (ene - etot);

    os.str("");
    os << "In molecule " << mol.GetTitle() << ": Wrong total energy ";
    os << " # found " << ene << ", expected " << etot;
    BOOST_CHECK_MESSAGE( delta < 0.005, os.str().c_str());
 
    snprintf(_logbuf, BUFF_SIZE, "Total ENERGY           %11.5f    %11.5f   %11.5f", ene, etot, delta);
    cout << _logbuf << endl;

  } // for (unsigned int c;; c++ )
   
  if (ifs)
    ifs.close();
  if (ifs2)
    ifs2.close();
  if (ofs)
    ofs.close();
}
Пример #7
0
int main(int argc,char **argv)
{
  char *program_name= argv[0];
  int c;
  char *FileIn = NULL;

  if (argc != 2)
    {
      string err = "Usage: ";
      err += program_name;
      err += " <filename>\n"
      "Output format:\n"
        "name NAME\n"
        "formula  FORMULA\n"
        "mol_weight MOLECULAR_WEIGHT\n"
        "exact_mass ISOTOPIC MASS\n"
        "canonical_SMILES STRING\n"
        "InChI  STRING\n"
        "num_atoms  NUM\n"
        "num_bonds  NUM\n"
        "num_residues  NUM\n"
	"num_rotors NUM\n"
        "sequence RESIDUE_SEQUENCE\n"
        "num_rings NUMBER_OF_RING_(SSSR)\n"
        "logP   NUM\n"
        "PSA    POLAR_SURFACE_AREA\n"
        "MR     MOLAR REFRACTIVITY";
      err += "$$$$";
//      ThrowError(err); wasn't being output because error level too low
      cerr << err; //Why not do directly
      exit(-1);
    }
  else
    {
      FileIn  = argv[1];
    }

  // Find Input filetype
  OBConversion conv;
  OBFormat *format = conv.FormatFromExt(FileIn);
    
  if (!format || !conv.SetInFormat(format))
    {
      cerr << program_name << ": cannot read input format!" << endl;
      exit (-1);
    }

  ifstream ifs;

  // Read the file
  ifs.open(FileIn);
  if (!ifs)
    {
      cerr << program_name << ": cannot read input file!" << endl;
      exit (-1);
    }
  
  OBMol mol;
  OBFormat *canSMIFormat = conv.FindFormat("can");
  OBFormat *inchiFormat = conv.FindFormat("inchi");


  ////////////////////////////////////////////////////////////////////////////
  // List of properties
  // Name
  // Molecular weight (Standard molar mass given by IUPAC atomic masses)
  // Number of rings : the size of the smallest set of smallest rings (SSSR)
  
  //.....ADD YOURS HERE.....
  
  for (c = 1;; ++c)
    {
      mol.Clear();
      conv.Read(&mol, &ifs);
      if (mol.Empty())
        break;
      
      if (!mol.HasHydrogensAdded())
        mol.AddHydrogens();
      // Print the properties
      if (strlen(mol.GetTitle()) != 0)
        cout << "name             " << mol.GetTitle() << endl;
      else 
        cout << "name             " << FileIn << " " << c << endl;

      cout << "formula          " << mol.GetFormula() << endl;
      cout << "mol_weight       " << mol.GetMolWt() << endl;
      cout << "exact_mass       " << mol.GetExactMass() << endl;

      string smilesString = "-";
      if (canSMIFormat) {
        conv.SetOutFormat(canSMIFormat);
        smilesString = conv.WriteString(&mol);
        if ( smilesString.length() == 0 )
        {
          smilesString = "-";
        }
      }
      cout << "canonical_SMILES " << smilesString << endl;

      string inchiString = "-";
      if (inchiFormat) {
        conv.SetOutFormat(inchiFormat);
        inchiString = conv.WriteString(&mol);
        if ( inchiString.length() == 0 )
        {
          inchiString = "-";
        }
      }
      cout << "InChI            " << inchiString << endl;

      cout << "num_atoms        " << mol.NumAtoms() << endl;
      cout << "num_bonds        " << mol.NumBonds() << endl;
      cout << "num_residues     " << mol.NumResidues() << endl;
      cout << "num_rotors       " << mol.NumRotors() << endl;
      if (mol.NumResidues() > 0)
        cout << "sequence         " << sequence(mol) << endl;
      else
        cout << "sequence         " << "-" << endl;

      cout << "num_rings        " << nrings(mol) << endl;

      OBDescriptor* pDesc;
      pDesc= OBDescriptor::FindType("logP");
      if(pDesc)
        cout << "logP             " << pDesc->Predict(&mol) << endl;

      pDesc = OBDescriptor::FindType("TPSA");
      if(pDesc)
        cout << "PSA              " << pDesc->Predict(&mol) << endl;

      pDesc = OBDescriptor::FindType("MR");
      if(pDesc)
        cout << "MR               " << pDesc->Predict(&mol) << endl;

      cout << "$$$$" << endl; // SDF like end of compound descriptor list
      
      //Other OBDescriptors could be output here, even ones that were rarely
      // used. Since these are plugin classes, they may not be loaded, but
      // then with code like the above they are just ignored.
    } // end for loop
  
  return(0);
}