bool FastSearchFormat::WriteChemObject(OBConversion* pConv)
  {
    //Prepares or updates an index file. Called for each molecule indexed
    bool update = pConv->IsOption("u")!=NULL;

    static ostream* pOs;
    static bool NewOstreamUsed;
    if(fsi==NULL)
      {
        //First pass sets up FastSearchIndexer object
        pOs = pConv->GetOutStream();// with named index it is already open
        NewOstreamUsed=false;
        string mes("prepare an");
        if(update)
          mes = "update the";
        clog << "This will " << mes << " index of " << pConv->GetInFilename()
             <<  " and may take some time..." << flush;

        if(!pConv->IsLastFile())
        {
          obErrorLog.ThrowError(__FUNCTION__,
            "There should not be multiple input files. A .fs file is an index of a single datafile.",
            obError);
          return false;
        }

        std::string auditMsg = "OpenBabel::Write fastsearch index ";
        std::string description(Description());
        auditMsg += description.substr( 0, description.find('\n') );
        obErrorLog.ThrowError(__FUNCTION__,auditMsg,obAuditMsg);

        FptIndex* pidx=NULL; //used with update

        //if(pOs==&cout) did not work with GUI
        if(!dynamic_cast<ofstream*>(pOs))
          {
            //No index filename specified
            //Derive index name from datafile name
            string indexname=pConv->GetInFilename();
            string::size_type pos=indexname.find_last_of('.');
            if(pos!=string::npos)
              indexname.erase(pos);
            indexname += ".fs";

            bool idxok=true;
            if(update)
              {
                LastSeekpos = 0;

                //Read in existing index
                idxok=false;
                ifstream ifs(indexname.c_str(),ifstream::binary);
                if(ifs.good())
                  {
                    pidx = new FptIndex;
                    idxok = pidx->Read(&ifs);
                  }
              }//ifs closed here

            pOs = new ofstream(indexname.c_str(),ofstream::binary);

            if(!pOs->good() || !idxok)
              {
                stringstream errorMsg;
                errorMsg << "Trouble opening or reading " << indexname << endl;
                obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
                static_cast<ofstream *>(pOs)->close(); // close the file before quitting
                delete pOs;
                delete pidx; // remove possible memory leak
                return false;
              }
            NewOstreamUsed=true;
          }
        else // not cout
          {
            if(update)
              {
                obErrorLog.ThrowError(__FUNCTION__,
                  "Currently, updating is only done on index files that"
                  "have the same name as the datafile.\n"
                  "Do not specify an output file; use the form:\n"
	                "   babel datafile.xxx -ofs -xu", obError);
                return false;
              }
          }

        int nbits = 0;
        const char* p = pConv->IsOption("N");
        if(p)
          nbits = atoi(p);

        string fpid; //fingerprint type
        p=pConv->IsOption("f");
        if(p)
          fpid=p;

        //Prepare name without path
        string datafilename = pConv->GetInFilename();
        if(datafilename.empty())
          {
            obErrorLog.ThrowError(__FUNCTION__, "No datafile!", obError);
            return false;
          }
        string::size_type pos = datafilename.find_last_of("/\\");
        if(pos!=string::npos)
          datafilename=datafilename.substr(pos+1);

        nmols = pConv->NumInputObjects();
        if(nmols>0)
          clog << "\nIt contains " << nmols << " molecules" << flush;
        if(nmols>500000)
        {
          istream* is = pConv->GetInStream();
          streampos origpos = is->tellg();
          is->seekg(0,ios_base::end);
          long long filesize = is->tellg();
          if(filesize > 4294967295u)
          {
            obErrorLog.ThrowError(__FUNCTION__, "The datafile must not be larger than 4GB", obError);
            return false;
          }
          is->seekg(origpos);
        }
        sw.Start();

        if(update)
          {
            fsi = new FastSearchIndexer(pidx, pOs, nmols);//using existing index

            //Seek to position in datafile of last of old objects
            LastSeekpos = *(pidx->seekdata.end()-1);
            pConv->GetInStream()->seekg(LastSeekpos);
          }
        else
          fsi = new FastSearchIndexer(datafilename, pOs, fpid, nbits, nmols);

        obErrorLog.StopLogging();
      }

    //All passes provide an object for indexing
    OBBase* pOb = pConv->GetChemObject();
    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
    if(pmol)
      pmol->ConvertDativeBonds();//use standard form for dative bonds

    streampos seekpos = pConv->GetInPos();
    if(!update || seekpos>LastSeekpos)
    {
      fsi->Add(pOb, seekpos );
      if(pConv->GetOutputIndex()==400 && nmols>1000)
      {
        clog << " Estimated completion time ";
        double secs = sw.Elapsed() * nmols / 400; //
        streamsize op = clog.precision(0);
        if(secs>150)
          clog << secs/60 << " minutes" << endl;
    else
          clog << secs << " seconds" << endl;
        clog.precision(op);
      }
    }
    else
      //Don't index old objects during update. Don't increment pConv->Index.
      pConv->SetOutputIndex(pConv->GetOutputIndex()-1);

    if(pConv->IsLast())
      {
        //Last pass
        delete fsi; //saves index file
        if(NewOstreamUsed)
          delete pOs;

        //return to starting conditions
        fsi=NULL;

        obErrorLog.StartLogging();

        double secs = sw.Elapsed();
        if(secs>150)
          clog << "\n It took " << secs/60 << " minutes" << endl;
        else
          clog << "\n It took " << secs << " seconds" << endl;
      }
    delete pOb;
    return true;
  }
Exemple #2
0
int main(int argc,char **argv)
{
  char *program_name= argv[0];
  int c;
  int steps = 2500;
  double crit = 1e-6;
  bool sd = false;
  bool cut = false;
  bool newton = false;
  bool hydrogens = false;
  double rvdw = 6.0;
  double rele = 10.0;
  int freq = 10;
  string basename, filename = "", option, option2, ff = "MMFF94";
  char *oext;
  OBConversion conv;
  OBFormat *format_out = conv.FindFormat("pdb"); // default output format

  if (argc < 2) {
    cout << "Usage: obminimize [options] <filename>" << endl;
    cout << endl;
    cout << "options:      description:" << endl;
    cout << endl;
    cout << "  -c crit     set convergence criteria (default=1e-6)" << endl;
    cout << endl;
    cout << "  -cg         use conjugate gradients algorithm (default)" << endl;
    cout << endl;
    cout << "  -sd         use steepest descent algorithm" << endl;
    cout << endl;
    cout << "  -newton     use Newton2Num linesearch (default=Simple)" << endl;
    cout << endl;
    cout << "  -ff ffid    select a forcefield:" << endl;
    cout << endl;
    cout << "  -h          add hydrogen atoms" << endl;
    cout << endl;
    cout << "  -n steps    specify the maximum numer of steps (default=2500)" << endl;
    cout << endl;
    cout << "  -cut        use cut-off (default=don't use cut-off)" << endl;
    cout << endl;
    cout << "  -rvdw rvdw  specify the VDW cut-off distance (default=6.0)" << endl;
    cout << endl;
    cout << "  -rele rele  specify the Electrostatic cut-off distance (default=10.0)" << endl;
    cout << endl;
    cout << "  -pf freq    specify the frequency to update the non-bonded pairs (default=10)" << endl;
    cout << endl;
    OBPlugin::List("forcefields", "verbose");
    exit(-1);
  } else {
    int ifile = 1;
    for (int i = 1; i < argc; i++) {
      option = argv[i];

      // steps
      if ((option == "-n") && (argc > (i+1))) {
        steps = atoi(argv[i+1]);
        ifile += 2;
      }
      // vdw cut-off
      if ((option == "-rvdw") && (argc > (i+1))) {
        rvdw = atof(argv[i+1]);
        ifile += 2;
      }
      // ele cut-off
      if ((option == "-rele") && (argc > (i+1))) {
        rele = atof(argv[i+1]);
        ifile += 2;
      }
      // pair update frequency
      if ((option == "-pf") && (argc > (i+1))) {
        freq = atoi(argv[i+1]);
        ifile += 2;
      }
      // steepest descent
      if (option == "-sd") {
        sd = true;
        ifile++;
      }
      // enable cut-off
      if (option == "-cut") {
        cut = true;
        ifile++;
      }
      // enable Newton2Num
      if (option == "-newton") {
        newton = true;
        ifile++;
      }

      if (strncmp(option.c_str(), "-o", 2) == 0) {
        oext = argv[i] + 2;
        if(!*oext) {
          oext = argv[++i]; //space left after -o: use next argument
          ifile++;
        }

        format_out = conv.FindFormat(oext);
        ifile++;
      }

      if (option == "-h") {
        hydrogens = true;
        ifile++;
      }

      if (option == "-cg") {
        sd = false;
        ifile++;
      }

      if ((option == "-c") && (argc > (i+1))) {
        crit = atof(argv[i+1]);
        ifile += 2;
      }

      if ((option == "-ff") && (argc > (i+1))) {
        ff = argv[i+1];
        ifile += 2;
      }
    }

    basename = filename = argv[ifile];
    size_t extPos = filename.rfind('.');

    if (extPos!= string::npos) {
      basename = filename.substr(0, extPos);
    }
  }

  // Find Input filetype
  OBFormat *format_in = conv.FormatFromExt(filename.c_str());

  if (!format_in || !format_out || !conv.SetInAndOutFormats(format_in, format_out)) {
    cerr << program_name << ": cannot read input/output format!" << endl;
    exit (-1);
  }

  ifstream ifs;
  ofstream ofs;

  // Read the file
  ifs.open(filename.c_str());
  if (!ifs) {
    cerr << program_name << ": cannot read input file!" << endl;
    exit (-1);
  }

  OBForceField* pFF = OBForceField::FindForceField(ff);
  if (!pFF) {
    cerr << program_name << ": could not find forcefield '" << ff << "'." <<endl;
    exit (-1);
  }

  // set some force field variables
  pFF->SetLogFile(&cerr);
  pFF->SetLogLevel(OBFF_LOGLVL_LOW);
  pFF->SetVDWCutOff(rvdw);
  pFF->SetElectrostaticCutOff(rele);
  pFF->SetUpdateFrequency(freq);
  pFF->EnableCutOff(cut);
  if (newton)
    pFF->SetLineSearchType(LineSearchType::Newton2Num);

  OBMol mol;

  for (c=1;;c++) {
    mol.Clear();
    if (!conv.Read(&mol, &ifs))
      break;
    if (mol.Empty())
      break;

    if (hydrogens)
      mol.AddHydrogens();

    if (!pFF->Setup(mol)) {
      cerr << program_name << ": could not setup force field." << endl;
      exit (-1);
    }

    bool done = true;
    OBStopwatch timer;
    timer.Start();
    if (sd) {
      pFF->SteepestDescentInitialize(steps, crit);
    } else {
      pFF->ConjugateGradientsInitialize(steps, crit);
    }

    unsigned int totalSteps = 1;
    while (done) {
      if (sd)
        done = pFF->SteepestDescentTakeNSteps(1);
      else
        done = pFF->ConjugateGradientsTakeNSteps(1);
      totalSteps++;

      if (pFF->DetectExplosion()) {
        cerr << "explosion has occured!" << endl;
        conv.Write(&mol, &cout);
        return(1);
      } else
        pFF->GetCoordinates(mol);
    }
    double timeElapsed = timer.Elapsed();

    pFF->GetCoordinates(mol);

    conv.Write(&mol, &cout);
    cerr << "Time: " << timeElapsed << "seconds. Iterations per second: " <<  double(totalSteps) / timeElapsed << endl;
  } // end for loop

  return(0);
}