bool PDBFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(pmol==NULL) return false; //Define some references so we can use the old parameter names ostream &ofs = *pConv->GetOutStream(); OBMol &mol = *pmol; unsigned int i; char buffer[BUFF_SIZE]; char type_name[10], padded_name[10]; char the_res[10]; char the_chain = ' '; const char *element_name; int res_num; bool het=true; int model_num = 0; if (!pConv->IsLast() || pConv->GetOutputIndex() > 1) { // More than one molecule record model_num = pConv->GetOutputIndex(); // MODEL 1-based index snprintf(buffer, BUFF_SIZE, "MODEL %8d", model_num); ofs << buffer << endl; } // write back all fields (REMARKS, HELIX, SHEET, SITE, ...) bool compndWritten = false; bool authorWritten = false; std::vector<OBGenericData*> pairData = mol.GetAllData(OBGenericDataType::PairData); for (std::vector<OBGenericData*>::iterator data = pairData.begin(); data != pairData.end(); ++data) { OBPairData *pd = static_cast<OBPairData*>(*data); string attr = pd->GetAttribute(); // filter to make sure we are writing pdb fields only if (attr != "HEADER" && attr != "OBSLTE" && attr != "TITLE" && attr != "SPLIT" && attr != "CAVEAT" && attr != "COMPND" && attr != "SOURCE" && attr != "KEYWDS" && attr != "EXPDTA" && attr != "NUMMDL" && attr != "MDLTYP" && attr != "AUTHOR" && attr != "REVDAT" && attr != "SPRSDE" && attr != "JRNL" && attr != "REMARK" && attr != "DBREF" && attr != "DBREF1" && attr != "DBREF2" && attr != "SEQADV" && attr != "SEQRES" && attr != "MODRES" && attr != "HET" && attr != "HETNAM" && attr != "HETSYN" && attr != "FORMUL" && attr != "HELIX" && attr != "SHEET" && attr != "SSBOND" && attr != "LINK" && attr != "CISPEP" && attr != "SITE" && attr != "ORIGX1" && attr != "ORIGX2" && attr != "ORIGX3" && attr != "SCALE1" && attr != "SCALE2" && attr != "SCALE3" && attr != "MATRIX1" && attr != "MATRIX2" && attr != "MATRIX3" && attr != "MODEL") continue; if (attr == "COMPND") compndWritten = true; if (attr == "AUTHOR") authorWritten = true; // compute spacing needed. HELIX, SITE, HET, ... are trimmed when reading int nSpacing = 6 - attr.size(); for (int i = 0; i < nSpacing; ++i) attr += " "; std::string lines = pd->GetValue(); string::size_type last = 0; string::size_type pos = lines.find('\n'); while (last != string::npos) { string line = lines.substr(last, pos - last); if (pos == string::npos) last = string::npos; else last = pos + 1; pos = lines.find('\n', last); ofs << attr << line << endl; } } if (!compndWritten) { if (strlen(mol.GetTitle()) > 0) snprintf(buffer, BUFF_SIZE, "COMPND %s ",mol.GetTitle()); else snprintf(buffer, BUFF_SIZE, "COMPND UNNAMED"); ofs << buffer << endl; } if (!authorWritten) { snprintf(buffer, BUFF_SIZE, "AUTHOR GENERATED BY OPEN BABEL %s",BABEL_VERSION); ofs << buffer << endl; } // Write CRYST1 record, containing unit cell parameters, space group // and Z value (supposed to be 1) if (pmol->HasData(OBGenericDataType::UnitCell)) { OBUnitCell *pUC = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell); if(pUC->GetSpaceGroup()){ string tmpHM=pUC->GetSpaceGroup()->GetHMName(); // Do we have an extended HM symbol, with origin choice as ":1" or ":2" ? If so, remove it. size_t n=tmpHM.find(":"); if(n!=string::npos) tmpHM=tmpHM.substr(0,n); snprintf(buffer, BUFF_SIZE, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s 1", pUC->GetA(), pUC->GetB(), pUC->GetC(), pUC->GetAlpha(), pUC->GetBeta(), pUC->GetGamma(), tmpHM.c_str()); } else snprintf(buffer, BUFF_SIZE, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s 1", pUC->GetA(), pUC->GetB(), pUC->GetC(), pUC->GetAlpha(), pUC->GetBeta(), pUC->GetGamma(), "P1"); ofs << buffer << endl; } // before we write any records, we should check to see if any coord < -1000 // which will cause errors in the formatting double minX, minY, minZ; minX = minY = minZ = -999.0f; FOR_ATOMS_OF_MOL(a, mol) { if (a->GetX() < minX) minX = a->GetX(); if (a->GetY() < minY) minY = a->GetY(); if (a->GetZ() < minZ) minZ = a->GetZ(); } vector3 transV = VZero; if (minX < -999.0) transV.SetX(-1.0*minX - 900.0); if (minY < -999.0) transV.SetY(-1.0*minY - 900.0); if (minZ < -999.0) transV.SetZ(-1.0*minZ - 900.0); // if minX, minY, or minZ was never changed, shift will be 0.0f // otherwise, move enough so that smallest coord is > -999.0f mol.Translate(transV); OBAtom *atom; OBResidue *res; for (i = 1; i <= mol.NumAtoms(); i++) { atom = mol.GetAtom(i); strncpy(type_name, etab.GetSymbol(atom->GetAtomicNum()), sizeof(type_name)); type_name[sizeof(type_name) - 1] = '\0'; //two char. elements are on position 13 and 14 one char. start at 14 if (strlen(type_name) > 1) type_name[1] = toupper(type_name[1]); else { char tmp[10]; strncpy(tmp, type_name, 10); snprintf(type_name, sizeof(type_name), " %-3s", tmp); } if ( (res = atom->GetResidue()) != 0 ) { het = res->IsHetAtom(atom); snprintf(the_res,4,"%s",(char*)res->GetName().c_str()); the_res[4] = '\0'; snprintf(type_name,5,"%s",(char*)res->GetAtomID(atom).c_str()); the_chain = res->GetChain(); //two char. elements are on position 13 and 14 one char. start at 14 if (strlen(etab.GetSymbol(atom->GetAtomicNum())) == 1) { if (strlen(type_name) < 4) { char tmp[10]; strncpy(tmp, type_name, 10); snprintf(padded_name, sizeof(padded_name), " %-3s", tmp); strncpy(type_name,padded_name,4); type_name[4] = '\0'; } else { /* type_name[4] = type_name[3]; type_name[3] = type_name[2]; type_name[2] = type_name[1]; type_name[1] = type_name[0]; type_name[0] = type_name[4]; */ type_name[4] = '\0'; } } res_num = res->GetNum(); } else { strcpy(the_res,"UNK"); the_res[3] = '\0'; snprintf(padded_name,sizeof(padded_name), "%s",type_name); strncpy(type_name,padded_name,4); type_name[4] = '\0'; res_num = 1; } element_name = etab.GetSymbol(atom->GetAtomicNum()); int charge = atom->GetFormalCharge(); char scharge[3] = { ' ', ' ', '\0' }; if(0 != charge) { snprintf(scharge, 3, "%+d", charge); char tmp = scharge[1]; scharge[1] = scharge[0]; scharge[0] = tmp; } snprintf(buffer, BUFF_SIZE, "%s%5d %-4s %-3s %c%4d %8.3f%8.3f%8.3f 1.00 0.00 %2s%2s\n", het?"HETATM":"ATOM ", i, type_name, the_res, the_chain, res_num, atom->GetX(), atom->GetY(), atom->GetZ(), element_name, scharge); ofs << buffer; } OBAtom *nbr; vector<OBBond*>::iterator k; for (i = 1; i <= mol.NumAtoms(); i ++) { atom = mol.GetAtom(i); if (atom->GetValence() == 0) continue; // no need to write a CONECT record -- no bonds snprintf(buffer, BUFF_SIZE, "CONECT%5d", i); ofs << buffer; // Write out up to 4 real bonds per line PR#1711154 int currentValence = 0; for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k)) { snprintf(buffer, BUFF_SIZE, "%5d", nbr->GetIdx()); ofs << buffer; if (++currentValence % 4 == 0) { // Add the trailing space to finish this record ofs << " \n"; // write the start of a new CONECT record snprintf(buffer, BUFF_SIZE, "CONECT%5d", i); ofs << buffer; } } // Add trailing spaces int remainingValence = atom->GetValence() % 4; for (int count = 0; count < (4 - remainingValence); count++) { snprintf(buffer, BUFF_SIZE, " "); ofs << buffer; } ofs << " \n"; } snprintf(buffer, BUFF_SIZE, "MASTER 0 0 0 0 0 0 0 0 "); ofs << buffer; snprintf(buffer, BUFF_SIZE, "%4d 0 %4d 0\n",mol.NumAtoms(),mol.NumAtoms()); ofs << buffer; ofs << "END\n"; if (model_num) { ofs << "ENDMDL" << endl; } return(true); }
int main(int argc,char **argv) { char *program_name= argv[0]; int c; int verbose = 0; bool hydrogens = false; string basename, filename = "", option, option2, ff = ""; OBConversion conv; if (argc < 2) { cout << "Usage: obenergy [options] <filename>" << endl << endl; cout << "options: description:" << endl << endl; cout << " -v verbose: print out indivual energy interactions" << endl << endl; cout << " -h add hydrogens before calculating energy" << endl << endl; cout << " -ff ffid select a forcefield" << endl << endl; cout << " available forcefields:" << endl << endl; OBPlugin::List("forcefields", "verbose"); exit(-1); } else { int ifile = 1; for (int i = 1; i < argc; i++) { option = argv[i]; if (option == "-v") { verbose = 1; ifile++; break; } if (option == "-h") { hydrogens = true; ifile++; } if ((option == "-ff") && (argc > (i+1))) { ff = argv[i+1]; ifile += 2; } } basename = filename = argv[ifile]; size_t extPos = filename.rfind('.'); size_t startPos = 0; if (extPos!= string::npos) { basename = filename.substr(startPos, extPos); } } // Find Input filetype OBFormat *format_in = conv.FormatFromExt(filename.c_str()); if (!format_in || !conv.SetInFormat(format_in)) { cerr << program_name << ": cannot read input format!" << endl; exit (-1); } ifstream ifs; ofstream ofs; // Read the file ifs.open(filename.c_str()); if (!ifs) { cerr << program_name << ": cannot read input file!" << endl; exit (-1); } OBForceField* pFF = OBForceField::FindForceField(ff); if (!pFF) { cerr << program_name << ": could not find forcefield '" << ff << "'." <<endl; exit (-1); } pFF->SetLogFile(&cout); pFF->SetLogLevel(OBFF_LOGLVL_NONE); // if (verbose) // pFF->SetLogLevel(OBFF_LOGLVL_HIGH); // else // pFF->SetLogLevel(OBFF_LOGLVL_MEDIUM); Timer totalTimer, readTimer, setupTimer, computeTimer; double totalTime, readTime, setupTime, computeTime; int nthreads = 1; // single core is just one thread unsigned int totalSteps = 1; // total steps is only used when doing minimization #ifdef _OPENMP // START OPENMP DEBUG #pragma omp parallel default(none) shared(nthreads) { nthreads = omp_get_num_threads(); // determine the number of threads } // parallel region completes #endif // END OPENMP DEBUG // This code only deals with creating a file to store the timings for // the execution of the program along with other interesting statistics char cwd[1024]; // a reasonably large current working directory (cwd) char filepath[1100]; // a reasonably large file name plus cwd const char * statsfile = "obenergy_runstats"; std::ofstream output; if (getcwd(cwd, sizeof (cwd)) != NULL) { std::cout << "Current working dir: " << cwd << std::endl; } else { std::cout << "The getcwd() method failed. Aborting." << std::endl; exit(1); }//if(getcwd() sprintf(filepath, "%s/%s_%s_t%d.mat", cwd, statsfile, ff.c_str(), nthreads); std::cout << "Writing output file to: " << filepath << std::endl; output.open(filepath, ios::out | ios::app ); // The file is open in append mode // 1 2 3 4 5 6 7 8 9 10 11 12 output << "#METHOD: " << ff << " DATASET: " << filename.c_str() << std::endl; output << "#THREADS ENERGY MOL_MASS NUM_ATOMS NUM_ROTORS NUM_CONF TOT_TIME TIME_READ TIME_SETUP TIME_COMPUTE STEPS #MOL_NAME " << std::endl; // A second file is created to store information on the timings of specific parts // from the MMFF94 calculation routines. breakdown of time spent in calculations char filepath2[1100]; std::ofstream output2; sprintf(filepath2, "%s/%s_%s_t%d_compute.mat", cwd, statsfile, ff.c_str(), nthreads); std::cout << "Writing method breakdown detail file to: " << filepath2 << std::endl; output2.open(filepath2, ios::out | ios::app ); // The file is open in append mode // 1 2 3 4 5 6 7 8 9 10 11 12 output2 << "#METHOD: " << ff << " THREADS: " << nthreads << " DATASET: " << filename.c_str() << std::endl; output2 << "#E_BOND E_ANGLE E_STRBND E_TORSION E_OOP E_VDW E_ELEC N_ATOMS PAIRS_VDW PAIRS_ELEC MEM_VDW MEM_ELEC " << std::endl; // A third file is created to store information on the memory allocation of data structures // from the MMFF94 calculation routines. breakdown of memory allocated for each calculation type char filepath3[1100]; std::ofstream output3; sprintf(filepath3, "%s/%s_%s_t%d_malloc.mat", cwd, statsfile, ff.c_str(), nthreads); std::cout << "Writing memory allocation breakdown detail file to: " << filepath3 << std::endl; output3.open(filepath3, ios::out | ios::app ); // The file is open in append mode // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 output3 << "#METHOD: " << ff << " THREADS: " << nthreads << " DATASET: " << filename.c_str() << std::endl; output3 << "#ATOMS M_BOND M_ANGLE M_STRBND M_TORSION M_OOP M_VDW M_ELEC C_BOND C_ANGLE C_STRBND C_TORSION C_OOP C_VDW C_ELEC" << std::endl; double bondCalcTime, angleCalcTime, strbndCalcTime, torsionCalcTime, oopCalcTime, vdwCalcTime, electrostaticCalcTime; int numPairsVDW, numPairsElectrostatic; OBMol mol; double energy; for (c=1;;c++) { mol.Clear(); totalTimer.start(); readTimer.start(); if (!conv.Read(&mol, &ifs)) break; if (mol.Empty()) break; if (hydrogens) mol.AddHydrogens(); readTime = readTimer.get(); setupTimer.start(); if (!pFF->Setup(mol)) { cerr << program_name << ": could not setup force field." << endl; exit (-1); } setupTime = setupTimer.get(); computeTimer.start(); energy = pFF->Energy(false); computeTime = computeTimer.get(); totalTime = totalTimer.get(); // THREADS ENERGY MOL_MASS NUM_ATOMS NUM_ROTORS NUM_CONF TOT_TIME TIME_READ TIME_SETUP TIME_COMPUTE STEPS #MOL_NAME output << nthreads << " " << energy << " " << mol.GetExactMass() << " " << mol.NumAtoms() << " " << mol.NumRotors() << " " << mol.NumConformers() << " " << totalTime << " " << readTime << " " << " " << setupTime << " " << computeTime << " " << totalSteps << " #" << mol.GetTitle() // comment added to avoid errors when reading matrix in Octave << std::endl; map<string, double> timings = pFF->getTimings(); map<string, size_t> memalloc = pFF->getAllocatedMemory(); MapKeys mk; // 1 2 3 4 5 6 7 8 9 10 11 12 // E_BOND E_ANGLE E_STRBND E_TORSION E_OOP E_VDW E_ELEC N_ATOMS PAIRS_VDW PAIRS_ELEC MEM_VDW MEM_ELEC output2 << timings[mk.TIME_BOND_CALCULATIONS] << " " // 1 << timings[mk.TIME_ANGLE_CALCULATIONS] << " " // 2 << timings[mk.TIME_STRBND_CALCULATIONS] << " " // 3 << timings[mk.TIME_TORSION_CALCULATIONS] << " " // 4 << timings[mk.TIME_OOP_CALCULATIONS] << " " // 5 << timings[mk.TIME_VDW_CALCULATIONS] << " " // 6 << timings[mk.TIME_ELECTROSTATIC_CALCULATIONS] << " " // 7 << mol.NumAtoms() << " " // 8 << timings[mk.TOTAL_VDW_CALCULATIONS] << " " // 9 << timings[mk.TOTAL_ELECTROSTATIC_CALCULATIONS] << " " // 10 << memalloc[mk.MEM_VDW_CALCULATIONS] << " " // 11 << memalloc[mk.MEM_ELECTROSTATIC_CALCULATIONS] << std::endl; // 12 // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 // ATOMS M_BOND M_ANGLE M_STRBND M_TORSION M_OOP M_VDW M_ELEC C_BOND C_ANGLE C_STRBND C_TORSION C_OOP C_VDW C_ELEC output3 << mol.NumAtoms() << " " // 1 << memalloc[mk.MEM_BOND_CALCULATIONS] << " " // 2 << memalloc[mk.MEM_ANGLE_CALCULATIONS] << " " // 3 << memalloc[mk.MEM_STRBND_CALCULATIONS] << " " // 4 << memalloc[mk.MEM_TORSION_CALCULATIONS] << " " // 5 << memalloc[mk.MEM_OOP_CALCULATIONS] << " " // 6 << memalloc[mk.MEM_VDW_CALCULATIONS] << " " // 7 << memalloc[mk.MEM_ELECTROSTATIC_CALCULATIONS] << " " // 8 << timings[mk.TOTAL_BOND_CALCULATIONS] << " " // 9 << timings[mk.TOTAL_ANGLE_CALCULATIONS] << " " // 10 << timings[mk.TOTAL_STRBND_CALCULATIONS] << " " // 11 << timings[mk.TOTAL_TORSION_CALCULATIONS] << " " // 12 << timings[mk.TOTAL_OOP_CALCULATIONS] << " " // 13 << timings[mk.TOTAL_VDW_CALCULATIONS] << " " // 14 << timings[mk.TOTAL_ELECTROSTATIC_CALCULATIONS] << " " // 15 << std::endl; if (!isfinite(energy)) { cerr << " Title: " << mol.GetTitle() << endl; FOR_ATOMS_OF_MOL(atom, mol) { cerr << " x: " << atom->x() << " y: " << atom->y() << " z: " << atom->z() << endl; } } } // end for loop
bool OpAlign::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; map<string,string>::const_iterator itr; // Is there an -s option? if(pConv->IsFirstInput()) { _pOpIsoM = NULL; //assume no -s option itr = pmap->find("s"); if(itr!=pmap->end()) { //There is an -s option; check it is ok _pOpIsoM = static_cast<OpNewS*>(OBOp::FindType("s")); _stext = itr->second; //get its parameter(s) if(!_pOpIsoM || _stext.empty()) { obErrorLog.ThrowError(__FUNCTION__, "No parameter on -s option, or its OBOp version is not loaded", obError); pConv->SetOneObjectOnly(); //to finish return false; } } } // If the output format is a 2D depiction format, then we should align // on the 2D coordinates and not the 3D coordinates (if present). This //means we need to generate the 2D coordinates at this point. if(pmol->GetDimension()==3 && (pConv->GetOutFormat()->Flags() & DEPICTION2D)) { OBOp* pgen = OBOp::FindType("gen2D"); if(pgen) pgen->Do(pmol); } // All molecules must have coordinates, so add them if 0D // They may be added again later when gen2D or gen3D is called, but they will be the same. // It would be better if this op was called after them, which would happen // if its name was alphabetically after "gen" (and before "s"). if(pmol->GetDimension()==0) { //Will the coordinates be 2D or 3D? itr = pmap->find("gen3D"); OBOp* pgen = (itr==pmap->end()) ? OBOp::FindType("gen2D") : OBOp::FindType("gen3D"); if(pgen) pgen->Do(pmol); } //Do the alignment in 2D if the output format is svg, png etc. and there is no -xn option if(pmol->GetDimension()==3 && pConv && !pConv->IsOption("n")) { OBFormat* pOutFormat = pConv->GetOutFormat(); if(pOutFormat->Flags() & DEPICTION2D) { OBOp* pgen = OBOp::FindType("gen2D"); if(pgen) pgen->Do(pmol); } } if(pConv->IsFirstInput() || _refMol.NumAtoms()==0) { _refvec.clear(); // Reference molecule is basically the first molecule _refMol = *pmol; if(!_pOpIsoM) //no -s option. Use a molecule reference. _align.SetRefMol(_refMol); else { //If there is a -s option, reference molecule has only those atoms that are matched //Call the -s option from here bool ret = _pOpIsoM->Do(pmol, _stext.c_str(), pmap, pConv); // Get the atoms that were matched vector<int> ats = _pOpIsoM->GetMatchAtoms(); if(!ats.empty()) { // Make a vector of the matching atom coordinates... for(vector<int>::iterator iter=ats.begin(); iter!=ats.end(); ++iter) _refvec.push_back((pmol->GetAtom(*iter))->GetVector()); // ...and use a vector reference _align.SetRef(_refvec); } // Stop -s option being called normally, although it will still be called once // in the DoOps loop already started for the current (first) molecule. pConv->RemoveOption("s",OBConversion::GENOPTIONS); if(!ret) { // the first molecule did not match the -s option so a reference molecule // could not be made. Keep trying. _refMol.Clear(); //obErrorLog.ThrowError(__FUNCTION__, "The first molecule did not match the -s option\n" // "so the reference structure was not derived from it", obWarning, onceOnly); return false; //not matched } } } //All molecules if(pmol->GetDimension()!= _refMol.GetDimension()) { stringstream ss; ss << "The molecule" << pmol->GetTitle() << " does not have the same dimensions as the reference molecule " << _refMol.GetTitle() << " and is ignored."; obErrorLog.ThrowError(__FUNCTION__, ss.str().c_str(), obError); return false; } if(_pOpIsoM) //Using -s option { //Ignore mol if it does not pass -s option if(!_pOpIsoM->Do(pmol, "", pmap, pConv)) // "" means will use existing parameters return false; // Get the atoms equivalent to those in ref molecule vector<int> ats = _pOpIsoM->GetMatchAtoms(); // Make a vector of their coordinates and get the centroid vector<vector3> vec; vector3 centroid; for(vector<int>::iterator iter=ats.begin(); iter!=ats.end(); ++iter) { vector3 v = pmol->GetAtom(*iter)->GetVector(); centroid += v; vec.push_back(v); } centroid /= vec.size(); // Do the alignment _align.SetTarget(vec); if(!_align.Align()) return false; // Get the centroid of the reference atoms vector3 ref_centroid; for(vector<vector3>::iterator iter=_refvec.begin(); iter!=_refvec.end(); ++iter) ref_centroid += *iter; ref_centroid /= _refvec.size(); //subtract the centroid, rotate the target molecule, then add the centroid matrix3x3 rotmatrix = _align.GetRotMatrix(); for (unsigned int i = 1; i <= pmol->NumAtoms(); ++i) { vector3 tmpvec = pmol->GetAtom(i)->GetVector(); tmpvec -= centroid; tmpvec *= rotmatrix; //apply the rotation tmpvec += ref_centroid; pmol->GetAtom(i)->SetVector(tmpvec); } } else //Not using -s option) { _align.SetTargetMol(*pmol); if(!_align.Align()) return false; _align.UpdateCoords(pmol); } //Save rmsd as a property OBPairData* dp = new OBPairData; dp->SetAttribute("rmsd"); double val = _align.GetRMSD(); if(val<1e-12) val = 0.0; dp->SetValue(toString(val)); dp->SetOrigin(local); pmol->SetData(dp); return true; }
void VibrationWidget::setMolecule(Molecule *molecule) { // update table ui.vibrationTable->clearContents(); if (molecule == 0){ ui.vibrationTable->setRowCount(0); ui.vibrationTable->horizontalHeader()->hide(); return; } m_molecule = molecule; OBMol obmol = molecule->OBMol(); m_vibrations = static_cast<OBVibrationData*>(obmol.GetData(OBGenericDataType::VibrationData)); if (!m_vibrations) { ui.vibrationTable->setRowCount(0); ui.vibrationTable->horizontalHeader()->hide(); //ui.exportButton->setEnabled(false); return; } ui.vibrationTable->horizontalHeader()->show(); ui.vibrationTable->horizontalHeader()->setResizeMode(QHeaderView::Stretch); // OK, we have valid vibrations, so add them to the table vector<double> frequencies = m_vibrations->GetFrequencies(); vector<double> intensities = m_vibrations->GetIntensities(); m_frequencies = frequencies; m_intensities = intensities; vector<double> raman_activities = m_vibrations->GetRamanActivities(); if (raman_activities.size() == 0) { //resize(274, height()); ui.vibrationTable->setColumnCount(2); if(parentWidget()) parentWidget()->setMinimumWidth(274); } else { //resize(310, height()); ui.vibrationTable->setColumnCount(3); ui.vibrationTable->setHorizontalHeaderItem(2, new QTableWidgetItem("Activity")); if(parentWidget()) parentWidget()->setMinimumWidth(310); } // Generate an index vector to map sorted indicies to the old indices m_indexMap->clear(); for (uint i = 0; i < frequencies.size(); i++) m_indexMap->push_back(i); // Setup progress dialog, just in case it takes longer than 2 seconds QProgressDialog prog(tr("Sorting %1 vibrations by frequency...") .arg(frequencies.size()), "", 0, frequencies.size()); prog.setWindowModality(Qt::WindowModal); prog.setMinimumDuration(2000); prog.setCancelButton(0); // Simple selection sort double tmp; int tmp_int; for (uint i = 0; i < frequencies.size(); i++) { for (uint j = i; j < frequencies.size(); j++) { if (i == j) continue; // Save a bit of time... if (frequencies.at(j) < frequencies.at(i)) { tmp = frequencies.at(j); frequencies.at(j) = frequencies.at(i); frequencies.at(i) = tmp; tmp = intensities.at(j); intensities.at(j) = intensities.at(i); intensities.at(i) = tmp; tmp_int = m_indexMap->at(j); m_indexMap->at(j) = m_indexMap->at(i); m_indexMap->at(i) = tmp_int; } } // Update progress bar prog.setValue(i); } ui.vibrationTable->setRowCount(frequencies.size()); QString format("%1"); for (unsigned int row = 0; row < frequencies.size(); ++row) { QTableWidgetItem *newFreq = new QTableWidgetItem(format.arg(frequencies[row], 0, 'f', 2)); newFreq->setTextAlignment(Qt::AlignRight|Qt::AlignVCenter); // Some codes don't provide intensity data. Display "-" in place of intensities. QTableWidgetItem *newInten; if (row >= intensities.size()) { newInten = new QTableWidgetItem("-"); } else { newInten = new QTableWidgetItem(format.arg(intensities[row], 0, 'f', 3)); } newInten->setTextAlignment(Qt::AlignRight|Qt::AlignVCenter); ui.vibrationTable->setItem(row, 0, newFreq); ui.vibrationTable->setItem(row, 1, newInten); if (raman_activities.size() != 0) { QTableWidgetItem *newRaman; if (row >= raman_activities.size()) { newRaman = new QTableWidgetItem("-"); } else { newRaman = new QTableWidgetItem(format.arg(raman_activities[row], 0, 'f', 3)); } newRaman->setTextAlignment(Qt::AlignRight|Qt::AlignVCenter); ui.vibrationTable->setItem(row, 2, newRaman); } } // enable export button //ui.exportButton->setEnabled(true); }
bool COFFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(pmol==NULL) return false; stringstream errorMsg; ostream& ofs = *(pConv->GetOutStream()); OBMol &mol = *pmol; ofs << "@CulgiVersion: 10.0.0\n" << "# Culgi Object File\n" << "# Generated by Open Babel\n\n" << "moleculekeys\n" << "\tname\n" << "\tid\n" << "end_moleculekeys\n\n" << "atomkeys\n" << "\tindex\n" << "\tname\n" << "\telement_type\n" << "\tforce_field_type\n" << "\tcharge\n" << "\tfixed\n" << "\tx\n" << "\ty\n" << "\tz\n" << "\tvelocity_x\n" << "\tvelocity_y\n" << "\tvelocity_z\n" << "\tvelocity_const_x\n" << "\tvelocity_const_y\n" << "\tvelocity_const_z\n" << "end_atomkeys\n\n" << "bondkeys\n" << "\tatom1_index\n" << "\tatom2_index\n" << "\tbond_order\n" << "end_bondkeys\n\n"; FOR_ATOMS_OF_MOL(atom, mol) { if(atom->GetFormalCharge()!=0) { ofs << "formalqkeys\n" << "\tatom_index\n" << "\tformal_charge\n" << "end_formalqkeys\n\n"; break; } } // Get molecule file name. Some formats add file path // etc to the name, so we try to clean that up here const char *tit = pmol->GetTitle(); std::string molname(tit); if(molname.empty()) molname = pConv->GetTitle(); size_t nPos = molname.find_last_of("."); if(nPos != std::string::npos) molname = molname.substr(0, nPos); nPos = molname.find_last_of("\\"); if(nPos != std::string::npos) molname = molname.substr(nPos+1, molname.size()); nPos = molname.find_last_of("/"); if(nPos != std::string::npos) molname = molname.substr(nPos+1, molname.size()); if(molname.empty()) molname = "mol"; ofs << "molecule\t" << molname << "\t0" << endl; int i = 0; vector<string> names; vector<string> elems; vector<int> nelem; ostringstream sstream; FOR_ATOMS_OF_MOL(atom, mol) { sstream.str(""); i++; string elem = OBElements::GetSymbol(atom->GetAtomicNum()); // Culgi does not recognize atom type 'Xx' but does know 'X'. if(elem == "Xx") elem = "X"; bool found = false; string ename; for( int j=0; j<elems.size(); j++) { if(elem == elems[j]) { found = true; nelem[j]++; sstream.str(""); sstream << elem << nelem[j]; ename = sstream.str(); names.push_back(ename); sstream.str(""); break; } } if (!found) { elems.push_back(elem); nelem.push_back(1); sstream << elem << "1"; ename = sstream.str(); names.push_back(ename); sstream.str(""); } ofs << "atom" << "\t" << i - 1 << "\t" << ename << "\t" << elem << "\t" << "X" << "\t" << atom->GetPartialCharge() << "\t" << "0" << "\t" << atom->GetX() << "\t" << atom->GetY() << "\t" << atom->GetZ() << "\t" << "0.00000" << "\t" << "0.00000" << "\t" << "0.00000" << "\t" << "0" << "\t" << "0" << "\t" << "0" << endl; }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); // Define location of file formats for testing #ifdef FORMATDIR char env[BUFF_SIZE]; snprintf(env, BUFF_SIZE, "BABEL_LIBDIR=%s", FORMATDIR); putenv(env); #endif if (argc != 1) { if (strncmp(argv[1], "-g", 2)) { cout << "Usage: charge-mmff94" << endl; return 0; } else { GenerateCharges(); return 0; } } cout << "# Testing MMFF94 Charge Model..." << endl; std::ifstream mifs; if (!SafeOpen(mifs, molecules_file.c_str())) { cout << "Bail out! Cannot read file " << molecules_file << endl; return -1; // test failed } std::ifstream rifs; if (!SafeOpen(rifs, results_file.c_str())) { cout << "Bail out! Cannot read file " << results_file << endl; return -1; // test failed } std::ifstream difs; if (!SafeOpen(difs, dipole_file.c_str())) { cout << "Bail out! Cannot read file " << dipole_file << endl; return -1; // test failed } char buffer[BUFF_SIZE]; vector<string> vs; OBMol mol; OBConversion conv(&mifs, &cout); unsigned int currentTest = 0; vector3 dipoleMoment, result; std::vector<double> partialCharges; if(! conv.SetInAndOutFormats("SDF","SDF")) { cout << "Bail out! SDF format is not loaded" << endl; return -1; // test failed } OBChargeModel *pCM = OBChargeModel::FindType("mmff94"); if (pCM == NULL) { cerr << "Bail out! Cannot load charge model!" << endl; return -1; // test failed } while(mifs) { mol.Clear(); conv.Read(&mol); if (mol.Empty()) continue; if (!difs.getline(buffer,BUFF_SIZE)) { cout << "Bail out! error reading reference data" << endl; return -1; // test failed } if (!pCM->ComputeCharges(mol)) { cout << "Bail out! could not compute charges on " << mol.GetTitle() << endl; return -1; // test failed } partialCharges = pCM->GetPartialCharges(); // compare the calculated energy to our reference data tokenize(vs, buffer); if (vs.size() < 3) return -1; dipoleMoment.SetX(atof(vs[0].c_str())); dipoleMoment.SetY(atof(vs[1].c_str())); dipoleMoment.SetZ(atof(vs[2].c_str())); result = pCM->GetDipoleMoment(mol) - dipoleMoment; if ( fabs(result.length_2()) > 1.0e-4) { cout << "not ok " << ++currentTest << " # calculated dipole incorrect " << " for molecule " << mol.GetTitle() << '\n'; } else cout << "ok " << ++currentTest << " # dipole\n"; FOR_ATOMS_OF_MOL(atom, mol) { if (!rifs.getline(buffer,BUFF_SIZE)) { cout << "Bail out! Cannot read reference data\n"; return -1; // test failed } if ( fabs(atom->GetPartialCharge() - atof(buffer)) > 1.0e-3 ) { cout << "not ok " << ++currentTest << " # calculated charge incorrect " << " for molecule " << mol.GetTitle() << '\n'; cout << "# atom " << atom->GetIdx() << " expected " << buffer << " got " << atom->GetPartialCharge() << '\n'; } else { cout << "ok " << ++currentTest << " # charge\n"; } } } // return number of tests run cout << "1.." << currentTest << endl; // Passed tests return 0; }
bool OBResidueData::AssignBonds(OBMol &mol,OBBitVec &bv) { if (!_init) Init(); OBAtom *a1,*a2; OBResidue *r1,*r2; vector<OBAtom*>::iterator i,j; vector3 v; int bo; string skipres = ""; // Residue Number to skip string rname = ""; //assign residue bonds for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) { r1 = a1->GetResidue(); if (r1 == NULL) // atoms may not have residues continue; if (skipres.length() && r1->GetNumString() == skipres) continue; if (r1->GetName() != rname) { skipres = SetResName(r1->GetName()) ? "" : r1->GetNumString(); rname = r1->GetName(); } //assign bonds for each atom for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j)) { r2 = a2->GetResidue(); if (r2 == NULL) // atoms may not have residues continue; if (r1->GetNumString() != r2->GetNumString()) break; if (r1->GetName() != r2->GetName()) break; if (r1->GetChain() != r2->GetChain()) break; // Fixes PR#2889763 - Fabian if ((bo = LookupBO(r1->GetAtomID(a1),r2->GetAtomID(a2)))) { // Suggested by Liu Zhiguo 2007-08-13 // for predefined residues, don't perceive connection // by distance // v = a1->GetVector() - a2->GetVector(); // if (v.length_2() < 3.5) //check by distance mol.AddBond(a1->GetIdx(),a2->GetIdx(),bo); } } } int hyb; string type; //types and hybridization rname = ""; // name of current residue skipres = ""; // don't skip any residues right now for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) { if (a1->GetAtomicNum() == OBElements::Oxygen && !a1->GetValence()) { a1->SetType("O3"); continue; } if (a1->GetAtomicNum() == OBElements::Hydrogen) { a1->SetType("H"); continue; } //***valence rule for O- if (a1->GetAtomicNum() == OBElements::Oxygen && a1->GetValence() == 1) { OBBond *bond; bond = (OBBond*)*(a1->BeginBonds()); if (bond->GetBO() == 2) { a1->SetType("O2"); a1->SetHyb(2); } else if (bond->GetBO() == 1) { // Leave the protonation/deprotonation to phmodel.txt a1->SetType("O3"); a1->SetHyb(3); // PR#3203039 -- Fix from Magnus Lundborg // a1->SetFormalCharge(0); } continue; } r1 = a1->GetResidue(); if (r1 == NULL) continue; // atoms may not have residues if (skipres.length() && r1->GetNumString() == skipres) continue; if (r1->GetName() != rname) { // if SetResName fails, skip this residue skipres = SetResName(r1->GetName()) ? "" : r1->GetNumString(); rname = r1->GetName(); } if (LookupType(r1->GetAtomID(a1),type,hyb)) { a1->SetType(type); a1->SetHyb(hyb); } else // try to figure it out by bond order ??? {} } return(true); }
/////////////////////////////////////////////////////////////////////////////// //! \brief Set a tortional bond to a given angle int main(int argc,char **argv) { const char *Pattern=NULL; unsigned int i, t, errflg = 0; int c; char flags[255]; string err; bool graphOutput=false; // parse the command line -- optional -a flag to change all matching torsions if (argc < 3 || argc > 4) { errflg++; } else { FileIn = argv[1]; Pattern = "[!$(*#*)&!D1]-!@[!$(*#*)&!D1]"; // Read the atom position c = sscanf(argv[2], "%d", &angleSum); angle = 360./angleSum; if (argc == 4) { c = sscanf(argv[3], "%s", flags); int flagid=1; while (flags[flagid]!=0) switch (flags[flagid++]) { case 'g': graphOutput=true; case 'e': forceField=OBForceField::FindForceField("MMFF94"); isEnergyCalcing=true; break; } } } if (errflg) { cerr << "Usage: rkrotate <filename> <angle> [options]" << endl; exit(-1); } // create pattern OBSmartsPattern sp; sp.Init(Pattern); OBFormat* format = conv.FormatFromExt(FileIn); if(!(format && conv.SetInAndOutFormats(format, format))) { //in and out formats same cerr << "obrotate: cannot read and/or write this file format!" << endl; exit (-1); } //...NF //Open the molecule file ifstream ifs; // Read the file ifs.open(FileIn); if (!ifs) { cerr << "obrotate: cannot read input file!" << endl; exit (-1); } OBMol mol; vector< vector <int> > maplist; // list of matched atoms // vector< vector <int> >::iterator m; // and its iterators // int tindex; // Set the angles for (;;) { mol.Clear(); //NF ifs >> mol; // Read molecule conv.Read(&mol,&ifs); //NF if (mol.Empty()) break; if (sp.Match(mol)) { // if match perform rotation maplist = sp.GetUMapList(); // get unique matches if (maplist.size() > 1) cerr << "obrotate: Found " << maplist.size() << " matches." << endl; energySheet=new MultiVector<double>(degrees=maplist.size(),angleSum); indexSheet=new int[maplist.size()]; for (int EXO=0;EXO<maplist.size();++EXO) totalSum*=angleSum+EXO; // look at all the mapping atom but save only the last one. turnMol(mol,maplist,maplist.size()-1); if (graphOutput) { ofstream ofs("energyGraph.mlog"); int ind[degrees]; for (int i=0;i<degrees;++i) ind[i]=0; do { for (int i=0;i<degrees;++i) ofs<<ind[i]<<'\t'; ofs<<energySheet->getVectorValue(ind)<<endl; } while(energySheet->incressIndex(ind)); } if (isEnergyCalcing) { std::vector<int*> lowEnergySheet; totalSum=energySheet->getMinValues(lowEnergySheet); if (totalSum) outputMol(lowEnergySheet,mol,maplist,maplist.size()-1); else cerr << "rkrotate: No low energy conformation found." << endl; } cout << sum; } else { cerr << "obrotate: Found 0 matches for the SMARTS pattern." << endl; exit(-1); } //NF cout << mol; } return(0); }
void GenerateSmartsReference() { std::ifstream ifs; if (!SafeOpen(ifs,smarts_file.c_str())) return; char buffer[BUFF_SIZE]; vector<OBSmartsPattern*> vsp; for (;ifs.getline(buffer,BUFF_SIZE);) { if (buffer[0] == '#') // skip comment line continue; OBSmartsPattern *sp = new OBSmartsPattern; if (sp->Init(buffer)) vsp.push_back(sp); else delete sp; } std::ofstream ofs; if (!SafeOpen(ofs, results_file.c_str())) return; ofs << vsp.size() << " patterns" << endl; std::ifstream mifs; if (!SafeOpen(mifs, smilestypes_file.c_str())) return; vector<int> vm; vector<vector<int> > mlist; vector<vector<int> >::iterator j; vector<OBSmartsPattern*>::iterator i; OBMol mol; OBConversion conv(&mifs, &cout); if(! conv.SetInAndOutFormats("SMI","SMI")) { cerr << "SMILES format is not loaded" << endl; return; } for (;mifs;) { mol.Clear(); conv.Read(&mol); if (mol.Empty()) continue; for (i = vsp.begin();i != vsp.end();i++) { (*i)->Match(mol); mlist = (*i)->GetMapList(); for (j = mlist.begin();j != mlist.end();j++) { sprintf(buffer,"%3d",*(j->begin())); ofs << buffer; } ofs << endl; } } cerr << " SMARTS test results written successfully" << endl; return; }
bool EEMCharges::ComputeCharges(OBMol &mol) { mol.SetPartialChargesPerceived(); if(_parameters.empty()) _loadParameters(); // Copied from spectrophore.cpp // CHI and ETA unsigned int _nAtoms = mol.NumAtoms(); unsigned int dim(_nAtoms + 1); std::vector<double> CHI(dim); double** ETA = new double*[dim]; for (unsigned int i = 0; i < dim; ++i) { ETA[i] = new double[dim]; } double totalCharge(0.0); unsigned int i(0); double hardness; double electronegativity; for (OpenBabel::OBMolAtomIter atom(mol); atom; atom++, i++) { int n = atom->GetAtomicNum(); int b = atom->HighestBondOrder(); // Search for parameters for a particular atom type bool found = false; for(unsigned int j = 0; j < _parameters.size(); j++) { if((_parameters[j].Z == n && _parameters[j].bond_order == b) || (_parameters[j].Z == n && _parameters[j].bond_order == - 1) || (_parameters[j].Z == -1 && _parameters[j].bond_order == -1)) { electronegativity = _parameters[j].A; hardness = _parameters[j].B; found = true; break; } } if(!found) { std::stringstream ss; ss << "No parameters found for: " << etab.GetSymbol(n) << " " << b << ". EEM charges were not calculated for the molecule." << std::endl; obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); return false; } CHI[i] = -electronegativity; ETA[i][i] = hardness; // Adjust the total molecular charge totalCharge += atom->GetFormalCharge(); } // Complete CHI CHI[_nAtoms] = totalCharge; // Complete ETA OBAtom *rAtom, *cAtom; for (unsigned int r = 0; r < _nAtoms; ++r) { rAtom = mol.GetAtom(r+1); // Atom index for (unsigned int c = r + 1; c < _nAtoms; ++c) { cAtom = mol.GetAtom(c+1); // Atom index ETA[r][c] = _kappa / cAtom->GetDistance(rAtom); ETA[c][r] = ETA[r][c]; } } for (unsigned int i = 0; i < dim; ++i) { ETA[i][_nAtoms] = -1.0; ETA[_nAtoms][i] = +1.0; } ETA[_nAtoms][_nAtoms] = 0.0; // Solve the matrix equation _solveMatrix(ETA, &(CHI[0]), dim); // CHI will contain the values OBAtom *atom; for (unsigned int i = 0; i < _nAtoms; ++i) { atom = mol.GetAtom(i+1); // atom index issue atom->SetPartialCharge(CHI[i]); } OBChargeModel::FillChargeVectors(mol); // Cleanup for(unsigned int i = 0; i < dim; i++) delete [] ETA[i]; delete [] ETA; return true; }
//! \return whether partial charges were successfully assigned to this molecule bool EQEqCharges::ComputeCharges(OBMol &mol) { int i, j, a, c, N = mol.NumAtoms(); double cellVolume; VectorXf chi(N), J(N), b(N), x(N); MatrixXf J_ij(N, N), A(N, N); OBUnitCell *obuc; matrix3x3 unitcell, fourier; vector3 dx; int numNeighbors[3]; OBAtom *atom; // If parameters have not yet been loaded, do that if (!_paramFileLoaded) { if (ParseParamFile()) { _paramFileLoaded = true; } else { return false; } } // Calculate atomic properties based around their ionic charge for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); a = atom->GetAtomicNum(); c = _chargeCenter[a]; // Fail if ionization data is missing for any atom in the molecule if (_ionizations[a][c + 1] == -1 || _ionizations[a][c] == -1 || a > TABLE_OF_ELEMENTS_SIZE) { obErrorLog.ThrowError(__FUNCTION__, "Insufficient ionization data for atoms in the given molecule. Update `data/eqeqIonizations.txt` with missing information and re-run this function.", obError); return false; } J(i) = _ionizations[a][c + 1] - _ionizations[a][c]; chi(i) = 0.5 * (_ionizations[a][c + 1] + _ionizations[a][c]) - (a == 1? 0 : c * J(i)); } // If a unit cell is defined, use the periodic Ewald calculation if (mol.HasData(OBGenericDataType::UnitCell)) { // Get unit cell and calculate its Fourier transform + volume obuc = (OBUnitCell *) mol.GetData(OBGenericDataType::UnitCell); unitcell = obuc->GetCellMatrix(); fourier = (2 * PI * unitcell.inverse()).transpose(); cellVolume = obuc->GetCellVolume(); // Get the number of radial unit cells to use in x, y, and z numNeighbors[0] = int(ceil(minCellLength / (2.0 * (obuc->GetA())))) - 1; numNeighbors[1] = int(ceil(minCellLength / (2.0 * (obuc->GetB())))) - 1; numNeighbors[2] = int(ceil(minCellLength / (2.0 * (obuc->GetC())))) - 1; for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); for (j = 0; j < N; j++) { dx = atom->GetVector() - (mol.GetAtom(j + 1))->GetVector(); J_ij(i, j) = GetPeriodicEwaldJij(J(i), J(j), dx, (i == j), unitcell, fourier, cellVolume, numNeighbors); } } // If no unit cell, use the simplified nonperiodic calculation } else { for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); for (j = 0; j < N; j++) { J_ij(i, j) = GetNonperiodicJij(J(i), J(j), atom->GetDistance(j + 1), (i == j)); } return false; } } // Formulate problem as A x = b, where x is the calculated partial charges // First equation is a simple overall balance: sum(Q) = 0 A.row(0) = VectorXf::Ones(N); b(0) = 0; // Remaining equations are based off of the fact that, at equilibrium, the // energy of the system changes equally for a change in any charge: // dE/dQ_1 = dE/dQ_2 = ... = dE/dQ_N A.block(1, 0, N - 1, N) = J_ij.block(0, 0, N - 1, N) - J_ij.block(1, 0, N - 1, N); b.tail(N - 1) = chi.tail(N - 1) - chi.head(N - 1); // The solution is a list of charges in the system x = A.colPivHouseholderQr().solve(b); // Now we are done calculating, pass all this back to OpenBabel molecule mol.SetPartialChargesPerceived(); OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue("EQEq"); dp->SetOrigin(perceived); mol.SetData(dp); m_partialCharges.clear(); m_partialCharges.reserve(N); m_formalCharges.clear(); m_formalCharges.reserve(N); for (i = 0; i < N; i ++) { atom = mol.GetAtom(i + 1); atom->SetPartialCharge(x(i)); m_partialCharges.push_back(x(i)); m_formalCharges.push_back(atom->GetFormalCharge()); } obErrorLog.ThrowError(__FUNCTION__, "EQEq charges successfully assigned.", obInfo); return true; }
bool FastSearchFormat::ObtainTarget(OBConversion* pConv, vector<OBMol>& patternMols, const string& indexname) { //Obtains an OBMol from: // the filename in the -s option or // the SMARTS string in the -s option or // by converting the file in the -S or -aS options (deprecated). // If there is no -s -S or -aS option, information on the index file is displayed. OBMol patternMol; patternMol.SetIsPatternStructure(); const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS); bool OldSOption=false; //If no -s option, make OBMol from file in -S option or -aS option (both deprecated) if(!p) { p = pConv->IsOption("S",OBConversion::GENOPTIONS); if(!p) p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly OldSOption = true; } if(p) { vector<string> vec; tokenize(vec, p); //ignore leading ~ (not relevant to fastsearch) if(vec[0][0]=='~') vec[0].erase(0,1); if(vec.size()>1 && vec[1]=="exact") pConv->AddOption("e", OBConversion::INOPTIONS); OBConversion patternConv; OBFormat* pFormat; //Interpret as a filename if possible string& txt =vec [0]; if( txt.empty() || txt.find('.')==string::npos || !(pFormat = patternConv.FormatFromExt(txt.c_str())) || !patternConv.SetInFormat(pFormat) || !patternConv.ReadFile(&patternMol, txt) || patternMol.NumAtoms()==0) //if false, have a valid patternMol from a file { //is SMARTS/SMILES //Replace e.g. [#6] in SMARTS by C so that it can be converted as SMILES //for the fingerprint phase, but allow more generality in the SMARTS phase. for(;;) { string::size_type pos1, pos2; pos1 = txt.find("[#"); if(pos1==string::npos) break; pos2 = txt.find(']'); int atno; if(pos2!=string::npos && (atno = atoi(txt.substr(pos1+2, pos2-pos1-2).c_str())) && atno>0) txt.replace(pos1, pos2-pos1+1, etab.GetSymbol(atno)); else { obErrorLog.ThrowError(__FUNCTION__,"Ill-formed [#n] atom in SMARTS", obError); return false; } } bool hasTildeBond; if( (hasTildeBond = (txt.find('~')!=string::npos)) ) // extra parens to indicate truth value { //Find ~ bonds and make versions of query molecule with a single and aromatic bonds //To avoid having to parse the SMILES here, replace ~ by $ (quadruple bond) //and then replace this in patternMol. Check first that there are no $ already //Sadly, isocynanides may have $ bonds. if(txt.find('$')!=string::npos) { obErrorLog.ThrowError(__FUNCTION__, "Cannot use ~ bonds in patterns with $ (quadruple) bonds.)", obError); return false; } replace(txt.begin(),txt.end(), '~' , '$'); } //read as standard SMILES patternConv.SetInFormat("smi"); if(!patternConv.ReadString(&patternMol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__,"Cannot read the SMILES string",obError); return false; } if(hasTildeBond) { AddPattern(patternMols, patternMol, 0); //recursively add all combinations of tilde bond values return true; } } } if(OldSOption) //only when using deprecated -S and -aS options { //make -s option for later SMARTS test OBConversion conv; if(conv.SetOutFormat("smi")) { string optiontext = conv.WriteString(&patternMol, true); pConv->AddOption("s", OBConversion::GENOPTIONS, optiontext.c_str()); } } if(!p) { //neither -s or -S options provided. Output info rather than doing search const FptIndexHeader& header = fs.GetIndexHeader(); string id(header.fpid); if(id.empty()) id = "default"; clog << indexname << " is an index of\n " << header.datafilename << ".\n It contains " << header.nEntries << " molecules. The fingerprint type is " << id << " with " << OBFingerprint::Getbitsperint() * header.words << " bits.\n" << "Typical usage for a substructure search:\n" << "babel indexfile.fs -osmi -sSMILES\n" << "(-s option in GUI is 'Convert only molecules matching SMARTS')" << endl; return false; } patternMols.push_back(patternMol); return true; }
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; // The SMARTS and any other parameters are extracted on the first molecule // and stored in the static variables vec, inv. The parameter is cleared so that: // (a) the original -s option in transform.cpp is inactive, and // (b) the parsing does not have to be done again for multi-molecule files string txt(pmap->find(GetID())->second); // ID can be "s" or "v" static vector<string> vec; static bool inv; static int nPatternAtoms; //non-zero for exact matches static OBQuery* query; static vector<OBQuery*> queries; vector<OBQuery*>::iterator qiter; if(!txt.empty()) { //Set up on first call tokenize(vec, txt); inv = GetID()[0]=='v'; if(vec[0][0]=='~') { inv = true; vec[0].erase(0,1); } //Interpret as a filename if possible MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms); if(vec.size()>1 && vec[1]=="exact") { if(queries.empty()) { //Convert SMARTS to SMILES to count number of atoms OBConversion conv; OBMol patmol; if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, " "which has to be valid SMILES when the exact option is used.", obError, onceOnly); delete pmol; pConv->SetOneObjectOnly(); //stop conversion return false; } nPatternAtoms = patmol.NumHvyAtoms(); } } else nPatternAtoms = 0; //disable old versions pConv->AddOption(GetID(), OBConversion::GENOPTIONS, ""); } bool match; //These are a vector of each mapping, each containing atom indxs. vector<vector<int> > vecatomvec; vector<vector<int> >* pMappedAtoms = NULL; OBSmartsPattern sp; if(nPatternAtoms) if(pmol->NumHvyAtoms() != nPatternAtoms) return false; int imol=0; //index of mol in pattern file if(!queries.empty()) //filename supplied { //match is set true if any of the structures match - OR behaviour for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) { OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter); OBIsomorphismMapper::Mappings mappings; mapper->MapUnique(pmol, mappings); if( (match = !mappings.empty()) ) // extra parens to indicate truth value { OBIsomorphismMapper::Mappings::iterator ita; OBIsomorphismMapper::Mapping::iterator itb; for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping { vector<int> atomvec; for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index atomvec.push_back(itb->second+1); vecatomvec.push_back(atomvec); atomvec.clear(); } pMappedAtoms = &vecatomvec; break; } } } else //SMARTS supplied { if(!sp.Init(vec[0])) { string msg = vec[0] + " cannot be interpreted as either valid SMARTS " "or the name of a file with an extension known to OpenBabel " "that contains one or more pattern molecules."; obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly); delete pmol; pmol = NULL; pConv->SetOneObjectOnly(); //stop conversion return false; } if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value pMappedAtoms = &sp.GetMapList(); } if((!match && !inv) || (match && inv)) { //delete a non-matching mol delete pmol; pmol = NULL; return false; } if(!inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms) { vector<vector<int> >::iterator iter; if(vec[1]=="extract") { //Delete all unmatched atoms. Use only the first match ExtractSubstruct(pmol, *pMappedAtoms->begin()); return true; } // color the substructure if there is a second parameter which is not "exact" or "extract" // with multiple color parameters use the one corresponding to the query molecule, or the last if(imol>vec.size()-2) imol = vec.size()-2; for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]); return true; } if(pConv && pConv->IsLast()) { for(qiter=queries.begin();qiter!=queries.end();++qiter) delete *qiter; queries.clear(); } return true; }
static bool parseAtomRecord(char *buffer, OBMol &mol,int /*chainNum*/) /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,a2,a2)" */ { string sbuf = &buffer[6]; if (sbuf.size() < 48) return(false); bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false; bool elementFound = false; // true if correct element found in col 77-78 /* serial number */ string serno = sbuf.substr(0,5); /* atom name */ string atmid = sbuf.substr(6,4); /* chain */ char chain = sbuf.substr(15,1)[0]; /* element */ string element = " "; if (sbuf.size() > 71) { element = sbuf.substr(70,2); if (isalpha(element[1])) { if (element[0] == ' ') { element.erase(0, 1); elementFound = true; } else if (isalpha(element[0])) { elementFound = true; } } } if (!elementFound) { stringstream errorMsg; errorMsg << "WARNING: Problems reading a PDB file\n" << " Problems reading a HETATM or ATOM record.\n" << " According to the PDB specification,\n" << " columns 77-78 should contain the element symbol of an atom.\n" << " but OpenBabel found '" << element << "' (atom " << mol.NumAtoms()+1 << ")"; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); } // charge - optional string scharge; if (sbuf.size() > 73) { scharge = sbuf.substr(72,2); } //trim spaces on the right and left sides while (!atmid.empty() && atmid[0] == ' ') atmid = atmid.erase(0, 1); while (!atmid.empty() && atmid[atmid.size()-1] == ' ') atmid = atmid.substr(0,atmid.size()-1); /* residue name */ string resname = sbuf.substr(11,3); if (resname == " ") resname = "UNK"; else { while (!resname.empty() && resname[0] == ' ') resname = resname.substr(1,resname.size()-1); while (!resname.empty() && resname[resname.size()-1] == ' ') resname = resname.substr(0,resname.size()-1); } string type; if (!elementFound) { // OK, we have to fall back to determining the element from the atom type // This is unreliable, but there's no other choice if (EQn(buffer,"ATOM",4)) { type = atmid.substr(0,2); if (isdigit(type[0])) { // sometimes non-standard files have, e.g 11HH if (!isdigit(type[1])) type = atmid.substr(1,1); else type = atmid.substr(2,1); } else if ((sbuf[6] == ' ' && strncasecmp(type.c_str(), "Zn", 2) != 0 && strncasecmp(type.c_str(), "Fe", 2) != 0) || isdigit(type[1])) //type[1] is digit in Platon type = atmid.substr(0,1); // one-character element if (resname.substr(0,2) == "AS" || resname[0] == 'N') { if (atmid == "AD1") type = "O"; if (atmid == "AD2") type = "N"; } if (resname.substr(0,3) == "HIS" || resname[0] == 'H') { if (atmid == "AD1" || atmid == "AE2") type = "N"; if (atmid == "AE1" || atmid == "AD2") type = "C"; } if (resname.substr(0,2) == "GL" || resname[0] == 'Q') { if (atmid == "AE1") type = "O"; if (atmid == "AE2") type = "N"; } // fix: #2002557 if (atmid[0] == 'H' && (atmid[1] == 'D' || atmid[1] == 'E' || atmid[1] == 'G' || atmid[1] == 'H')) // HD, HE, HG, HH, .. type = "H"; } else { //must be hetatm record if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' '))) { if (isalpha(element[0])) type = element.substr(0,2); else type = element.substr(1,1); if (type.size() == 2) type[1] = tolower(type[1]); } else { // no element column to use if (isalpha(atmid[0])) { if (atmid.size() > 2 && (atmid[2] == '\0' || atmid[2] == ' ')) type = atmid.substr(0,2); else if (atmid[0] == 'A') // alpha prefix type = atmid.substr(1, atmid.size() - 1); else type = atmid.substr(0,1); } else if (atmid[0] == ' ') type = atmid.substr(1,1); // one char element else type = atmid.substr(1,2); // Some cleanup steps if (atmid == resname) { type = atmid; if (type.size() == 2) type[1] = tolower(type[1]); } else if (resname == "ADR" || resname == "COA" || resname == "FAD" || resname == "GPG" || resname == "NAD" || resname == "NAL" || resname == "NDP" || resname == "ABA") { if (type.size() > 1) type = type.substr(0,1); //type.erase(1,type.size()-1); } else // other residues if (isdigit(type[0])){ type = type.substr(1,1); } else if (type.size() > 1 && isdigit(type[1])) type = type.substr(0,1); else if (type.size() > 1 && isalpha(type[1])) { if (type[0] == 'O' && type[1] == 'H') type = type.substr(0,1); // no "Oh" element (e.g. 1MBN) else if(isupper(type[1])) { type[1] = tolower(type[1]); } } } } // HETATM records } // no element column to use OBAtom atom; /* X, Y, Z */ string xstr = sbuf.substr(24,8); string ystr = sbuf.substr(32,8); string zstr = sbuf.substr(40,8); vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str())); atom.SetVector(v); atom.ForceImplH(); // useful for debugging unknown atom types (e.g., PR#1577238) // cout << mol.NumAtoms() + 1 << " : '" << element << "'" << " " << etab.GetAtomicNum(element.c_str()) << endl; if (elementFound) atom.SetAtomicNum(etab.GetAtomicNum(element.c_str())); else // use our old-style guess from athe atom type atom.SetAtomicNum(etab.GetAtomicNum(type.c_str())); if ( (! scharge.empty()) && " " != scharge ) { if ( isdigit(scharge[0]) && ('+' == scharge[1] || '-' == scharge[1]) ) { const char reorderCharge[3] = { scharge[1], scharge[0], '\0' }; const int charge = atoi(reorderCharge); atom.SetFormalCharge(charge); } else { stringstream errorMsg; errorMsg << "WARNING: Problems reading a PDB file\n" << " Problems reading a HETATM or ATOM record.\n" << " According to the PDB specification,\n" << " columns 79-80 should contain charge of the atom\n" << " but OpenBabel found '" << scharge << "' (atom " << mol.NumAtoms()+1 << ")."; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); } } else { atom.SetFormalCharge(0); } /* residue sequence number */ string resnum = sbuf.substr(16,4); OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL; if (res == NULL || res->GetName() != resname || res->GetNumString() != resnum || res->GetChain() != chain) { vector<OBResidue*>::iterator ri; for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) if (res->GetName() == resname && res->GetNumString() == resnum && static_cast<int>(res->GetChain()) == chain) break; if (res == NULL) { res = mol.NewResidue(); res->SetChain(chain); res->SetName(resname); res->SetNum(resnum); } } if (!mol.AddAtom(atom)) return(false); else { OBAtom *atom = mol.GetAtom(mol.NumAtoms()); res->AddAtom(atom); res->SetSerialNum(atom, atoi(serno.c_str())); res->SetAtomID(atom, sbuf.substr(6,4)); res->SetHetAtom(atom, hetatm); return(true); } } // end reading atom records
bool ACRFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; istream& ifs = *pConv->GetInStream(); pmol->BeginModify(); /** Parse the input stream and use the OpenBabel API to populate the OBMol **/ int id; char buf[BUFF_SIZE]; int atoms, bonds, tmp; float scale, dtmp; bool atom_input = false, bond_input = false; string type; //int from, to; double X,Y,Z; vector<string> vs; // read in one at a time /* WARNING: Atom id starts from zero in Carine; not so in openbabel. * Solution: increment atom id's */ while (true) { ifs.getline(buf, BUFF_SIZE); if (ifs.eof()) { break; } if (sscanf(buf, "General Scale=%f\n", &dtmp)) { scale = dtmp; continue; } else if (sscanf(buf, "Number of Atoms in Crystal=%d\n", &tmp)) { atoms = tmp; atom_input = true; // read table column names ifs.getline(buf, BUFF_SIZE); continue; } else if (sscanf(buf, "Number of Links in Crystal=%d\n", &tmp)) { atom_input = false; bond_input = true; bonds = tmp; // read table column names ifs.getline(buf, BUFF_SIZE); continue; } else if ( '#' == buf[0] || '\r' == buf[0] || '\n' == buf[0] ) { // between sections, in both windows and unix. continue; } tokenize(vs, buf, " \t\r\n"); if (atom_input) { if (vs.size() < 9) return false; // timvdm 18/06/2008 id = atoi((char*)vs[0].c_str()) + 1; // see warning above type = vs[1]; X = atof((char*)vs[6].c_str())/scale; Y = atof((char*)vs[7].c_str())/scale; Z = atof((char*)vs[8].c_str())/scale; OBAtom* a = pmol->NewAtom(); if (*(type.c_str()) != '*') a->SetAtomicNum(etab.GetAtomicNum(type.c_str())); a->SetVector(X,Y,Z); } else if (bond_input) { if (vs.size() < 2) return false; // timvdm 18/06/2008 // add to pmol if (!pmol->AddBond(atoi((char*)vs[0].c_str()) + 1, atoi((char*)vs[1].c_str()) + 1, 1 /* bond order not specified in Carine, use PerceiveBondOrder later */)) { obErrorLog.ThrowError(__FUNCTION__, "addition of bond between " + vs[0] + " and " + vs[1] + " failed", obError); return false; } } } /* got sanity? */ if ( pmol->NumBonds() != bonds ) { // then we read a different number of bonds than those promised. obErrorLog.ThrowError(__FUNCTION__, "Number of bonds read does not match the number promised", obError); return false; } else if ( pmol->NumAtoms() != atoms ) { obErrorLog.ThrowError(__FUNCTION__, "Number of atoms read does not match the number promised", obError); return false; } pmol->PerceiveBondOrders(); pmol->EndModify(); return true; }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); // Define location of file formats for testing #ifdef FORMATDIR char env[BUFF_SIZE]; snprintf(env, BUFF_SIZE, "BABEL_LIBDIR=%s", FORMATDIR); putenv(env); #endif if (argc != 1) { if (strncmp(argv[1], "-g", 2)) { cout << "Usage: smartstest\n"; cout << " Tests Open Babel SMILES/SMARTS pattern matching." << endl; return 0; } else { GenerateSmartsReference(); return 0; } } cout << endl << "# Testing SMARTS... \n"; std::ifstream ifs; if (!SafeOpen(ifs, smarts_file.c_str())) { cout << "Bail out! Cannot read " << smarts_file << endl; return -1; // test failed } //read in the SMARTS test patterns char buffer[BUFF_SIZE]; vector<OBSmartsPattern*> vsp; for (;ifs.getline(buffer,BUFF_SIZE);) { if (buffer[0] == '#') // skip comment line continue; OBSmartsPattern *sp = new OBSmartsPattern; if (sp->Init(buffer)) vsp.push_back(sp); else delete sp; } ifs.close(); std::ifstream rifs; if (!SafeOpen(rifs, results_file.c_str())) { cout << "Bail out! Cannot read in results file " << results_file << endl; return -1; // test failed } unsigned int npats; rifs.getline(buffer,BUFF_SIZE); sscanf(buffer,"%d %*s",&npats); //make sure the number of SMARTS patterns is the same as in the //reference data if (npats != vsp.size()) { cout << "Bail out! Correct number of patterns not read in" << "Read in " << vsp.size() << " expected " << npats << endl; return -1; // test failed } std::ifstream mifs; if (!SafeOpen(mifs, smilestypes_file.c_str())) { cout << "Bail out! Cannot read atom types " << smilestypes_file << endl; return -1; // test failed } unsigned int k; unsigned int res_line = 0; OBMol mol; vector<string> vs; vector<OBSmartsPattern*>::iterator i; vector<vector<int> > mlist; unsigned int currentMol = 0; // each molecule is a separate test bool molPassed = true; OBConversion conv(&mifs, &cout); if (! conv.SetInAndOutFormats("SMI","SMI")) { cout << "Bail out! SMILES format is not loaded" << endl; return -1; } //read in molecules, match SMARTS, and compare results to reference data for (;mifs;) { mol.Clear(); conv.Read(&mol); if (mol.Empty()) continue; currentMol++; molPassed = true; for (i = vsp.begin();i != vsp.end();i++) { if (!rifs.getline(buffer,BUFF_SIZE)) { cout << "Bail out! Error reading reference data" << endl; return -1; // test failed } res_line++; tokenize(vs,buffer); (*i)->Match(mol); mlist = (*i)->GetMapList(); if (mlist.size() != vs.size()) { cout << "not ok " << currentMol << " # number of matches different than reference\n"; cout << "# Expected " << vs.size() << " matches, found " << mlist.size() << "\n"; cout << "# Error with molecule " << mol.GetTitle(); cout << "# on pattern " << (*i)->GetSMARTS() << "\n"; if (mlist.size()) cout << "# First match: atom #" << mlist[0][0] << "\n"; molPassed = false; break; } if (mlist.size()) { for (k = 0;k < vs.size();k++) { if (atoi(vs[k].c_str()) != mlist[k][0]) { cout << "not ok " << currentMol << "# matching atom numbers different than reference\n"; cout << "# Expected " << vs[k] << " but found " << mlist[k][0] << "\n"; cout << "# Molecule: " << mol.GetTitle() << "\n"; cout << "# Pattern: " << (*i)->GetSMARTS() << "\n"; molPassed = false; break; } } if (k != vs.size()) { molPassed = false; break; } } } if (molPassed) cout << "ok " << currentMol << " # molecule passed tests\n"; } // output the number of tests run cout << "1.." << currentMol << endl; // Passed Test return 0; }
bool SVGFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; ostream &ofs = *pConv->GetOutStream(); //Check for option for single mol in fixed size image const char* fixedpx = pConv->IsOption("P"); if(!fixedpx) fixedpx= pConv->IsOption("px", OBConversion::GENOPTIONS); //If WriteMolecule called directly, e.g. from OBConversion::Write() //the default mode is a fixed image size of 200px square if(!fixedpx && !pConv->IsOption("svgwritechemobject")) fixedpx = "200"; if(fixedpx) { _nmax = _nrows = _ncols = 1; pConv->AddOption("j"); pConv->SetLast(true); pConv->SetOutputIndex(1); } //*** Coordinate generation *** //Generate coordinates only if no existing 2D coordinates if( (pConv->IsOption("y") || !pmol->Has2D(true)) && !pConv->IsOption("n") ) { OBOp* pOp = OBOp::FindType("gen2D"); if(!pOp) { obErrorLog.ThrowError("SVGFormat", "gen2D not found", obError, onceOnly); return false; } if(!pOp->Do(pmol)) { obErrorLog.ThrowError("SVGFormat", string(pmol->GetTitle()) + "- Coordinate generation unsuccessful", obError); return false; } } if(!pmol->Has2D() && pmol->NumAtoms()>1)//allows 3D coordinates (if passed by -xn above) { string mes("Molecule "); mes += pmol->GetTitle(); mes += " needs 2D coordinates to display in SVGformat"; obErrorLog.ThrowError("SVGFormat", mes, obError); return false; } bool hasTable = (_nrows || _ncols); bool transparent=false; string background, bondcolor; const char* bg = pConv->IsOption("b"); background = bg ? "black" : "white"; bondcolor = bg ? "white" : "black"; if(bg && (!strcmp(bg, "none") || bg[0]=='0')) { transparent = true; bondcolor = "gray"; } const char* bcol = pConv->IsOption("B"); if(bcol && *bcol) bondcolor = bcol; if(bg && *bg) background = bg; if(pConv->GetOutputIndex()==1 || fixedpx) { //For the first molecule... if(hasTable) { //multiple molecules - use a table //Outer svg has viewbox for 0 0 100 100 or adjusted for table shape, //and no width or height - it uses the whole of its containing element. //Inner svg with width, height, x, y of table cell, //and viewbox to match molecule min and max x and y if(!pConv->IsOption("x")) ofs << "<?xml version=\"1.0\"?>\n"; ofs << "<svg version=\"1.1\" id=\"topsvg\"\n" "xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n" "xmlns:cml=\"http://www.xml-cml.org/schema\" "; //*** Outer viewbox *** double vbwidth=100, vbheight=100; if (_nrows>_ncols) vbwidth = (100*_ncols)/_nrows; else if(_ncols>_nrows) vbheight = (100*_nrows)/_ncols; if(fixedpx)//fixed size image ofs << "x=\"0\" y=\"0\" width=\"" << fixedpx << "px\" height=\"" << fixedpx <<"px\" "; else ofs << "x=\"0\" y=\"0\" width=\"100%\" height=\"100%\" "; ofs << "viewBox=\"0 0 " << vbwidth << ' ' << vbheight << "\">\n"; ofs << "<title>OBDepict</title>\n"; // Draw the background unless transparent if(!transparent) ofs << "<rect x=\"0\" y=\"0\" width=\"" << vbwidth << "\" height=\"" << vbheight << "\" fill=\"" << background << "\"/>\n"; } } //All mols double cellsize; if(hasTable) { //*** Parameter for inner svg *** int nc = _ncols ? _ncols : 1; int nr = (_nrows ? _nrows : 1); cellsize = 100. / std::max(nc, nr); int indx = pConv->GetOutputIndex() - 1; double innerX = (indx % nc) * cellsize; double innerY = (indx / nc) * cellsize; // Change the background in this cell if the condition in the first // parameter of the -xh option is met. Use a default color if // the highlight color is not specified in the second parameter. const char* htxt = pConv->IsOption("h"); if(htxt) { vector<string> vec; tokenize(vec, htxt); string highlight(vec.size()>1 ? vec[1] : "#f4f0ff"); std::istringstream conditionText(vec[0]); if(OBDescriptor::FilterCompare(pOb, conditionText, false)) //Still in outer <svg>, unfortunately ofs << "<rect x=\"" << innerX << "\" y=\"" << innerY << "\" width=\"" << cellsize << "\" height=\"" << cellsize << "\" fill=\"" << highlight << "\"/>\n"; } //*** Write molecule name *** if(!pConv->IsOption("d")) ofs << "<text text-anchor=\"middle\" font-size=\"" << 0.06*cellsize << "\"" << " fill =\"" << bondcolor << "\" font-family=\"sans-serif\"\n" << "x=\"" << innerX + cellsize * 0.5 << "\" y=\"" << innerY + cellsize - 2.0/nr << "\" >" << pmol->GetTitle() << "</text>\n"; SVGPainter painter(*pConv->GetOutStream(), true, cellsize,cellsize,innerX,innerY); OBDepict depictor(&painter); if(!pConv->IsOption("C")) depictor.SetOption(OBDepict::drawTermC);// on by default if(pConv->IsOption("a")) depictor.SetOption(OBDepict::drawAllC); if(pConv->IsOption("A")) { AliasData::RevertToAliasForm(*pmol); depictor.SetAliasMode(); } painter.SetFontFamily("sans-serif"); painter.SetPenColor(OBColor(bondcolor)); depictor.SetBondColor(bondcolor); if(pConv->IsOption("t")) painter.SetPenWidth(4); else painter.SetPenWidth(2); //No element-specific atom coloring if requested if(pConv->IsOption("u")) depictor.SetOption(OBDepict::bwAtoms); if(!pConv->IsOption("U")) depictor.SetOption(OBDepict::internalColor); if(pConv->IsOption("s")) depictor.SetOption(OBDepict::asymmetricDoubleBond); depictor.DrawMolecule(pmol); //Draw atom indices if requested if(pConv->IsOption("i")) depictor.AddAtomLabels(OBDepict::AtomIndex); //Embed CML of molecule if requested if(pConv->IsOption("e")) EmbedCML(pmol,pConv); } else //single molecule { //Nothing written until DrawMolecule call //Final </svg> written at the end of this block (painter destructor) //This leads to some code duplication. double factor = 1.0; SVGPainter painter(*pConv->GetOutStream(), false); OBDepict depictor(&painter); //Scale image by specifying the average bond length in pixels. const char* ppx = pConv->IsOption("p"); if(!ppx) ppx= pConv->IsOption("px", OBConversion::GENOPTIONS); if(ppx) { double oldblen = depictor.GetBondLength(); double newblen = atof(ppx); depictor.SetBondLength(newblen); factor = newblen / oldblen; //Scale bondspacing and font size by same factor depictor.SetBondSpacing(depictor.GetBondSpacing() * factor); depictor.SetFontSize((int)(depictor.GetFontSize() * factor)); } if(pConv->IsOption("W")) depictor.SetOption(OBDepict::noWedgeHashGen); if(!pConv->IsOption("C")) depictor.SetOption(OBDepict::drawTermC);// on by default if(pConv->IsOption("a")) depictor.SetOption(OBDepict::drawAllC); if(pConv->IsOption("A")) { AliasData::RevertToAliasForm(*pmol); depictor.SetAliasMode(); } painter.SetFontFamily("sans-serif"); painter.SetPenColor(OBColor(bondcolor)); depictor.SetBondColor(bondcolor); painter.SetFillColor(OBColor(background)); if(pConv->IsOption("t")) painter.SetPenWidth(4); else painter.SetPenWidth(1); //No element-specific atom coloring if requested if(pConv->IsOption("u")) depictor.SetOption(OBDepict::bwAtoms); if(!pConv->IsOption("U")) depictor.SetOption(OBDepict::internalColor); if(pConv->IsOption("s")) depictor.SetOption(OBDepict::asymmetricDoubleBond); depictor.DrawMolecule(pmol); //Draw atom indices if requested if(pConv->IsOption("i")) depictor.AddAtomLabels(OBDepict::AtomIndex); //*** Write molecule name *** if(!pConv->IsOption("d")) ofs << "<text font-size=\"" << 18 * factor << "\"" << " fill =\"" << bondcolor << "\" font-family=\"sans-serif\"\n" << "x=\"" << 10 * factor << "\" y=\"" << 20 * factor << "\" >" << pmol->GetTitle() << "</text>\n"; //*** Write page title name *** ofs << "<title>" << pmol->GetTitle() << " - OBDepict</title>\n"; //Embed CML of molecule if requested if(pConv->IsOption("e")) EmbedCML(pmol,pConv); } if(hasTable && pConv->IsLast()) { //Draw grid lines if(_nrows && _ncols && pConv->IsOption("l")) { for(int i=1; i<_nrows; ++i) ofs << " <line stroke=\"gray\" stroke-width=\"0.1\" x1=\"0\" x2=\"100\"" << " y1=\"" << i*cellsize << "\" y2=\"" << i*cellsize << "\"/>\n"; for(int i=1; i<_ncols; ++i) ofs << " <line stroke=\"gray\" stroke-width=\"0.1\" y1=\"0\" y2=\"100\"" << " x1=\"" << i*cellsize << "\" x2=\"" << i*cellsize << "\"/>\n"; } //Insert javascript for zooming and panning if(!pConv->IsOption("j")) EmbedScript(ofs); ofs << "</svg>\n" << endl;//Outer svg } return !fixedpx; // return false with fixed size image because only 1 mol }
bool FastSearchFormat::ReadChemObject(OBConversion* pConv) { //Searches index file for structural matches //This function is called only once per search std::string auditMsg = "OpenBabel::Read fastsearch index "; std::string description(Description()); auditMsg += description.substr(0,description.find('\n')); obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg); //Derive index name string indexname = pConv->GetInFilename(); string::size_type pos=indexname.find_last_of('.'); if(pos!=string::npos) { indexname.erase(pos); indexname += ".fs"; } //Have to open input stream again because needs to be in binary mode ifstream ifs; stringstream errorMsg; if(!indexname.empty()) ifs.open(indexname.c_str(),ios::binary); if(!ifs) { errorMsg << "Couldn't open " << indexname << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); return false; } string datafilename = fs.ReadIndex(&ifs); if(datafilename.empty()) { errorMsg << "Difficulty reading from index " << indexname << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); return false; } OBMol patternMol; bool doSubset = pConv->IsOption("s",OBConversion::INOPTIONS)!=NULL;// -as option bool exactmatch = pConv->IsOption("e",OBConversion::INOPTIONS)!=NULL;// -ae option if(!doSubset) { //Similarity or substructure if(!ObtainTarget(pConv, patternMol, indexname)) return false; } //Open the datafile and put it in pConv //datafile name derived from index file probably won't have a file path //but indexname may. Derive a full datafile name string path; pos = indexname.find_last_of("/\\"); if(pos==string::npos) path = datafilename; else path = indexname.substr(0,pos+1) + datafilename; ifstream datastream(path.c_str()); if(!datastream) { errorMsg << "Difficulty opening " << path << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); return false; } pConv->SetInStream(&datastream); //Input format is currently fs; set it appropriately if(!pConv->SetInAndOutFormats(pConv->FormatFromExt(datafilename.c_str()),pConv->GetOutFormat())) return false; pConv->AddOption("b",OBConversion::GENOPTIONS); //Now do searching const char* p = pConv->IsOption("t",OBConversion::INOPTIONS); if(p) { //Do a similarity search multimap<double, unsigned int> SeekposMap; string txt=p; if(txt.find('.')==string::npos) { //Finds n molecules with largest Tanimoto int n = atoi(p); fs.FindSimilar(&patternMol, SeekposMap, n); } else { //Finds molecules with Tanimoto > MinTani double MinTani = atof(txt.c_str()); // if(doSubset) // fs.FindSubset(SeekposMap, MinTani); // else fs.FindSimilar(&patternMol, SeekposMap, MinTani); } //Don't want to filter through SMARTS filter pConv->RemoveOption("s", OBConversion::GENOPTIONS); multimap<double, unsigned int>::reverse_iterator itr; for(itr=SeekposMap.rbegin(); itr!=SeekposMap.rend(); ++itr) { datastream.seekg(itr->second); if(pConv->IsOption("a", OBConversion::INOPTIONS)) { //Adds Tanimoto coeff to title //First remove any previous value pConv->RemoveOption("addtotitle", OBConversion::GENOPTIONS); stringstream ss; ss << " " << itr->first; pConv->AddOption("addtotitle",OBConversion::GENOPTIONS, ss.str().c_str()); } pConv->SetOneObjectOnly(); if(itr != --SeekposMap.rend()) pConv->SetMoreFilesToCome();//so that not seen as last on output pConv->Convert(NULL,NULL); } } else { //Structure search int MaxCandidates = 4000; p = pConv->IsOption("l",OBConversion::INOPTIONS); if(p && atoi(p)) MaxCandidates = atoi(p); vector<unsigned int> SeekPositions; if(exactmatch) { //Find mols where all fingerprint bits are the same as the target fs.FindMatch(&patternMol, SeekPositions, MaxCandidates); // ensure that SMARTS filter in transform.cpp looks only for an exact match // by setting an option with the number of heavy atoms in the pattern mol included. stringstream ss; ss << patternMol.NumHvyAtoms(); pConv->AddOption("exactmatch", OBConversion::GENOPTIONS, ss.str().c_str()); } else { //Do a substructure search fs.Find(&patternMol, SeekPositions, MaxCandidates); clog << SeekPositions.size() << " candidates from fingerprint search phase" << endl; } //Output the candidate molecules //filtering through s filter, unless the fingerprint type does not require it if(fs.GetFingerprint()->Flags() & OBFingerprint::FPT_UNIQUEBITS) pConv->RemoveOption("s",OBConversion::GENOPTIONS); vector<unsigned int>::iterator itr; for(itr=SeekPositions.begin(); itr!=SeekPositions.end(); itr++) { datastream.seekg(*itr); // datastream.seekg(*itr - datastream.tellg(), ios_base::cur); //Avoid retrieving start //debugging kludge to output all candidates directly if(pConv->IsOption("c",OBConversion::GENOPTIONS)) { string ln; getline(datastream,ln); datastream.seekg(*itr); *pConv->GetOutStream() << "** " << ln << endl; } pConv->SetOneObjectOnly(); pConv->SetLast(itr+1 == SeekPositions.end()); pConv->Convert(NULL,NULL); } } return false; //To finish }
bool OBChemTsfm::Apply(OBMol &mol) { if (!_bgn.Match(mol)) return(false); mol.BeginModify(); vector<vector<int> > mlist = _bgn.GetUMapList(); obErrorLog.ThrowError(__FUNCTION__, "Ran OpenBabel::OBChemTransform", obAuditMsg); if (!_vchrg.empty()) //modify charges { vector<vector<int> >::iterator i; vector<pair<int,int> >::iterator j; for (i = mlist.begin();i != mlist.end();++i) for (j = _vchrg.begin();j != _vchrg.end();++j) if (j->first < (signed)i->size()) { //goof proofing OBAtom *atom = mol.GetAtom((*i)[j->first]); int old_charge = atom->GetFormalCharge(); atom->SetFormalCharge(j->second); int new_hcount = atom->GetImplicitHCount() + (j->second - old_charge); if (new_hcount < 0) new_hcount = 0; atom->SetImplicitHCount(new_hcount); } } if (!_vbond.empty()) //modify bond orders { OBBond *bond; vector<vector<int> >::iterator i; vector<pair<pair<int,int>,int> >::iterator j; for (i = mlist.begin();i != mlist.end();++i) for (j = _vbond.begin();j != _vbond.end();++j) { bond = mol.GetBond((*i)[j->first.first],(*i)[j->first.second]); if (!bond) { obErrorLog.ThrowError(__FUNCTION__, "unable to find bond", obDebug); continue; } unsigned int old_bond_order = bond->GetBondOrder(); bond->SetBondOrder(j->second); for (int k = 0; k < 2; ++k) { OBAtom* atom = k == 0 ? bond->GetBeginAtom() : bond->GetEndAtom(); int new_hcount = atom->GetImplicitHCount() - (j->second - old_bond_order); if (new_hcount < 0) new_hcount = 0; atom->SetImplicitHCount(new_hcount); } } } if (!_vadel.empty() || !_vele.empty()) //delete atoms and change elements { vector<int>::iterator j; vector<vector<int> >::iterator i; if (!_vele.empty()) { vector<pair<int,int> >::iterator k; for (i = mlist.begin();i != mlist.end();++i) for (k = _vele.begin();k != _vele.end();++k) mol.GetAtom((*i)[k->first])->SetAtomicNum(k->second); } //make sure same atom isn't deleted twice vector<bool> vda; vector<OBAtom*> vdel; vda.resize(mol.NumAtoms()+1,false); for (i = mlist.begin();i != mlist.end();++i) for (j = _vadel.begin();j != _vadel.end();++j) if (!vda[(*i)[*j]]) { vda[(*i)[*j]] = true; vdel.push_back(mol.GetAtom((*i)[*j])); } vector<OBAtom*>::iterator k; for (k = vdel.begin();k != vdel.end();++k) mol.DeleteAtom((OBAtom*)*k); } mol.EndModify(); return(true); }
bool FastSearchFormat::WriteChemObject(OBConversion* pConv) { //Prepares or updates an index file. Called for each molecule indexed bool update = pConv->IsOption("u")!=NULL; static ostream* pOs; static bool NewOstreamUsed; if(fsi==NULL) { //First pass sets up FastSearchIndexer object pOs = pConv->GetOutStream();// with named index it is already open NewOstreamUsed=false; string mes("prepare an"); if(update) mes = "update the"; clog << "This will " << mes << " index of " << pConv->GetInFilename() << " and may take some time..." << flush; if(!pConv->IsLastFile()) { obErrorLog.ThrowError(__FUNCTION__, "There should not be multiple input files. A .fs file is an index of a single datafile.", obError); return false; } std::string auditMsg = "OpenBabel::Write fastsearch index "; std::string description(Description()); auditMsg += description.substr( 0, description.find('\n') ); obErrorLog.ThrowError(__FUNCTION__,auditMsg,obAuditMsg); sw.Start(); FptIndex* pidx; //used with update //if(pOs==&cout) did not work with GUI if(!dynamic_cast<ofstream*>(pOs)) { //No index filename specified //Derive index name from datafile name string indexname=pConv->GetInFilename(); string::size_type pos=indexname.find_last_of('.'); if(pos!=string::npos) indexname.erase(pos); indexname += ".fs"; bool idxok=true; if(update) { LastSeekpos = 0; //Read in existing index idxok=false; ifstream ifs(indexname.c_str(),ifstream::binary); if(ifs.good()) { pidx = new FptIndex; idxok = pidx->Read(&ifs); } }//ifs closed here pOs = new ofstream(indexname.c_str(),ofstream::binary); if(!pOs->good() || !idxok) { stringstream errorMsg; errorMsg << "Trouble opening or reading " << indexname << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); return false; } NewOstreamUsed=true; } else // not cout { if(update) { obErrorLog.ThrowError(__FUNCTION__, "Currently, updating is only done on index files that" "have the same name as the datafile.\n" "Do not specify an output file; use the form:\n" " babel datafile.xxx -ofs -xu", obError); return false; } } int nbits = 0; const char* p = pConv->IsOption("N"); if(p) nbits = atoi(p); string fpid; //fingerprint type p=pConv->IsOption("f"); if(p) fpid=p; //Prepare name without path string datafilename = pConv->GetInFilename(); if(datafilename.empty()) { obErrorLog.ThrowError(__FUNCTION__, "No datafile!", obError); return false; } string::size_type pos = datafilename.find_last_of("/\\"); if(pos!=string::npos) datafilename=datafilename.substr(pos+1); int nmols = pConv->NumInputObjects(); if(nmols>0) clog << "\nIt contains " << nmols << " molecules" << flush; if(update) { fsi = new FastSearchIndexer(pidx, pOs, nmols);//using existing index //Seek to position in datafile of last of old objects LastSeekpos = *(pidx->seekdata.end()-1); pConv->GetInStream()->seekg(LastSeekpos); } else fsi = new FastSearchIndexer(datafilename, pOs, fpid, nbits, nmols); obErrorLog.StopLogging(); } //All passes provide an object for indexing OBBase* pOb = pConv->GetChemObject(); OBMol* pmol = dynamic_cast<OBMol*> (pOb); if(pmol) pmol->ConvertDativeBonds();//use standard form for dative bonds streampos seekpos = pConv->GetInPos(); if(!update || seekpos>LastSeekpos) fsi->Add(pOb, seekpos ); else //Don't index old objects during update. Don't increment pConv->Index. pConv->SetOutputIndex(pConv->GetOutputIndex()-1); if(pConv->IsLast()) { //Last pass delete fsi; //saves index file if(NewOstreamUsed) delete pOs; //return to starting conditions fsi=NULL; obErrorLog.StartLogging(); double secs = sw.Elapsed(); if(secs>150) clog << "\n It took " << secs/60 << " minutes" << endl; else clog << "\n It took " << secs << " seconds" << endl; } delete pOb; return true; }
int main(int argc,char **argv) { char *program_name= argv[0]; int c; int steps = 2500; double crit = 1e-6; bool sd = false; bool cut = false; bool newton = false; bool hydrogens = false; double rvdw = 6.0; double rele = 10.0; int freq = 10; string basename, filename = "", option, option2, ff = "MMFF94"; char *oext; OBConversion conv; OBFormat *format_out = conv.FindFormat("pdb"); // default output format if (argc < 2) { cout << "Usage: obminimize [options] <filename>" << endl; cout << endl; cout << "options: description:" << endl; cout << endl; cout << " -c crit set convergence criteria (default=1e-6)" << endl; cout << endl; cout << " -cg use conjugate gradients algorithm (default)" << endl; cout << endl; cout << " -sd use steepest descent algorithm" << endl; cout << endl; cout << " -newton use Newton2Num linesearch (default=Simple)" << endl; cout << endl; cout << " -ff ffid select a forcefield:" << endl; cout << endl; cout << " -h add hydrogen atoms" << endl; cout << endl; cout << " -n steps specify the maximum numer of steps (default=2500)" << endl; cout << endl; cout << " -cut use cut-off (default=don't use cut-off)" << endl; cout << endl; cout << " -rvdw rvdw specify the VDW cut-off distance (default=6.0)" << endl; cout << endl; cout << " -rele rele specify the Electrostatic cut-off distance (default=10.0)" << endl; cout << endl; cout << " -pf freq specify the frequency to update the non-bonded pairs (default=10)" << endl; cout << endl; OBPlugin::List("forcefields", "verbose"); exit(-1); } else { int ifile = 1; for (int i = 1; i < argc; i++) { option = argv[i]; // steps if ((option == "-n") && (argc > (i+1))) { steps = atoi(argv[i+1]); ifile += 2; } // vdw cut-off if ((option == "-rvdw") && (argc > (i+1))) { rvdw = atof(argv[i+1]); ifile += 2; } // ele cut-off if ((option == "-rele") && (argc > (i+1))) { rele = atof(argv[i+1]); ifile += 2; } // pair update frequency if ((option == "-pf") && (argc > (i+1))) { freq = atoi(argv[i+1]); ifile += 2; } // steepest descent if (option == "-sd") { sd = true; ifile++; } // enable cut-off if (option == "-cut") { cut = true; ifile++; } // enable Newton2Num if (option == "-newton") { newton = true; ifile++; } if (strncmp(option.c_str(), "-o", 2) == 0) { oext = argv[i] + 2; if(!*oext) { oext = argv[++i]; //space left after -o: use next argument ifile++; } format_out = conv.FindFormat(oext); ifile++; } if (option == "-h") { hydrogens = true; ifile++; } if (option == "-cg") { sd = false; ifile++; } if ((option == "-c") && (argc > (i+1))) { crit = atof(argv[i+1]); ifile += 2; } if ((option == "-ff") && (argc > (i+1))) { ff = argv[i+1]; ifile += 2; } } basename = filename = argv[ifile]; size_t extPos = filename.rfind('.'); if (extPos!= string::npos) { basename = filename.substr(0, extPos); } } // Find Input filetype OBFormat *format_in = conv.FormatFromExt(filename.c_str()); if (!format_in || !format_out || !conv.SetInAndOutFormats(format_in, format_out)) { cerr << program_name << ": cannot read input/output format!" << endl; exit (-1); } ifstream ifs; ofstream ofs; // Read the file ifs.open(filename.c_str()); if (!ifs) { cerr << program_name << ": cannot read input file!" << endl; exit (-1); } OBForceField* pFF = OBForceField::FindForceField(ff); if (!pFF) { cerr << program_name << ": could not find forcefield '" << ff << "'." <<endl; exit (-1); } // set some force field variables pFF->SetLogFile(&cerr); pFF->SetLogLevel(OBFF_LOGLVL_LOW); pFF->SetVDWCutOff(rvdw); pFF->SetElectrostaticCutOff(rele); pFF->SetUpdateFrequency(freq); pFF->EnableCutOff(cut); if (newton) pFF->SetLineSearchType(LineSearchType::Newton2Num); OBMol mol; for (c=1;;c++) { mol.Clear(); if (!conv.Read(&mol, &ifs)) break; if (mol.Empty()) break; if (hydrogens) mol.AddHydrogens(); if (!pFF->Setup(mol)) { cerr << program_name << ": could not setup force field." << endl; exit (-1); } bool done = true; OBStopwatch timer; timer.Start(); if (sd) { pFF->SteepestDescentInitialize(steps, crit); } else { pFF->ConjugateGradientsInitialize(steps, crit); } unsigned int totalSteps = 1; while (done) { if (sd) done = pFF->SteepestDescentTakeNSteps(1); else done = pFF->ConjugateGradientsTakeNSteps(1); totalSteps++; if (pFF->DetectExplosion()) { cerr << "explosion has occured!" << endl; conv.Write(&mol, &cout); return(1); } else pFF->GetCoordinates(mol); } double timeElapsed = timer.Elapsed(); pFF->GetCoordinates(mol); conv.Write(&mol, &cout); cerr << "Time: " << timeElapsed << "seconds. Iterations per second: " << double(totalSteps) / timeElapsed << endl; } // end for loop return(0); }
bool FastSearchFormat::ObtainTarget(OBConversion* pConv, OBMol& patternMol, const string& indexname) { //Obtains an OBMol // either from the SMARTS string in the -s option // or by converting the file in the -S option //or, if neither option is provided, displays information on the index file. stringstream smiles(stringstream::out); ifstream patternstream; OBConversion PatternConv(&patternstream,&smiles); const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS); string txt; if(p) { // Use the -s option txt=p; stringstream smarts(txt, stringstream::in); OBConversion Convsm(&smarts); if(!Convsm.SetInFormat("smi")) return false; Convsm.Read(&patternMol); //erase -s option in GeneralOptions since it will be rewritten pConv->RemoveOption("s",OBConversion::GENOPTIONS); if(patternMol.Empty()) { obErrorLog.ThrowError(__FUNCTION__, "Could not make a molecule from " + smarts.str() + "\nThis needs to be valid SMILES when using fastsearch." "You can use the more versatile SMARTS in a normal substructure search." , obError); return false; } } else { // or Make OBMol from file in -S option or -aS option p = pConv->IsOption("S",OBConversion::GENOPTIONS); if(!p) p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly } if(!p) { //neither -s or -S options provided. Output info rather than doing search const FptIndexHeader& header = fs.GetIndexHeader(); string id(header.fpid); if(id.empty()) id = "default"; clog << indexname << " is an index of\n " << header.datafilename << ".\n It contains " << header.nEntries << " molecules. The fingerprint type is " << id << " with " << OBFingerprint::Getbitsperint() * header.words << " bits.\n" << "Typical usage for a substructure search:\n" << "babel indexfile.fs -osmi -sSMILES" << endl; return false; } if(p && patternMol.Empty()) { txt=p; string::size_type pos = txt.find_last_of('.'); if(pos==string::npos) { obErrorLog.ThrowError(__FUNCTION__, "Filename of pattern molecule in -S option must have an extension", obError); return false; } patternstream.open(txt.c_str()); if(!patternstream) { stringstream errorMsg; errorMsg << "Cannot open " << txt << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); return false; } PatternConv.SetOneObjectOnly(); if(PatternConv.SetInFormat(txt.substr(pos+1).c_str())) PatternConv.Read(&patternMol); } if(patternMol.Empty()) { obErrorLog.ThrowError(__FUNCTION__, "Cannot derive a molecule from the -s or -S options", obWarning); return false; } patternMol.ConvertDativeBonds();//use standard form for dative bonds //Convert to SMILES and generate a -s option for use in the final filtering if(!PatternConv.SetOutFormat("smi")) return false; PatternConv.Write(&patternMol); //remove name to leave smiles string string smilesstr(smiles.str()); string::size_type pos = smilesstr.find_first_of(" \t\r\n"); if(pos!=string::npos) smilesstr = smilesstr.substr(0,pos); pConv->AddOption("s", OBConversion::GENOPTIONS, smilesstr.c_str()); return true; }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); OBConversion conv; OBFormat *inFormat, *canFormat; OBMol mol; ifstream ifs; vector<OBMol> fragments; unsigned int fragmentCount = 0; // track how many in library -- give a running count map<string, int> index; // index of cansmi string currentCAN; unsigned int size; OBAtom *atom; OBBond *bond; bool nonRingAtoms, nonRingBonds; char buffer[BUFF_SIZE]; canFormat = conv.FindFormat("can"); conv.SetOutFormat(canFormat); if (argc < 2) { cout << "Usage: obfragment <file>" << endl; return(-1); } for (int i = 1; i < argc; i++) { cerr << " Reading file " << argv[i] << endl; inFormat = conv.FormatFromExt(argv[i]); if(inFormat==NULL || !conv.SetInFormat(inFormat)) { cerr << " Cannot read file format for " << argv[i] << endl; continue; // try next file } ifs.open(argv[i]); if (!ifs) { cerr << "Cannot read input file: " << argv[i] << endl; continue; } while(ifs.peek() != EOF && ifs.good()) { conv.Read(&mol, &ifs); if (!mol.Has3D()) continue; // invalid coordinates! mol.DeleteHydrogens(); // remove these before we do anything else do { nonRingAtoms = false; size = mol.NumAtoms(); for (unsigned int i = 1; i <= size; ++i) { atom = mol.GetAtom(i); if (!atom->IsInRing()) { mol.DeleteAtom(atom); nonRingAtoms = true; break; // don't know how many atoms there are } // Previously, we changed atoms to carbon here. // Now we perform this alchemy in terms of string-rewriting // once the canonical SMILES is generated } } while (nonRingAtoms); if (mol.NumAtoms() < 3) continue; if (mol.NumBonds() == 0) continue; do { nonRingBonds = false; size = mol.NumBonds(); for (unsigned int i = 0; i < size; ++i) { bond = mol.GetBond(i); if (!bond->IsInRing()) { mol.DeleteBond(bond); nonRingBonds = true; break; // don't know how many bonds there are } } } while (nonRingBonds); fragments = mol.Separate(); for (unsigned int i = 0; i < fragments.size(); ++i) { if (fragments[i].NumAtoms() < 3) // too small to care continue; currentCAN = conv.WriteString(&fragments[i], true); currentCAN = RewriteSMILES(currentCAN); // change elements to "a/A" for compression if (index.find(currentCAN) != index.end()) { // already got this index[currentCAN] += 1; // add to the count for bookkeeping continue; } index[currentCAN] = 1; // don't ever write this ring fragment again // OK, now retrieve the canonical ordering for the fragment vector<string> canonical_order; if (fragments[i].HasData("Canonical Atom Order")) { OBPairData *data = (OBPairData*)fragments[i].GetData("Canonical Atom Order"); tokenize(canonical_order, data->GetValue().c_str()); } // Write out an XYZ-style file with the CANSMI as the title cout << fragments[i].NumAtoms() << '\n'; cout << currentCAN << '\n'; // endl causes a flush vector<string>::iterator can_iter; unsigned int order; OBAtom *atom; fragments[i].Center(); fragments[i].ToInertialFrame(); for (unsigned int index = 0; index < canonical_order.size(); ++index) { order = atoi(canonical_order[index].c_str()); atom = fragments[i].GetAtom(order); snprintf(buffer, BUFF_SIZE, "C%8.3f%8.3f%8.3f\n", atom->x(), atom->y(), atom->z()); cout << buffer; } } fragments.clear(); if (index.size() > fragmentCount) { fragmentCount = index.size(); cerr << " Fragments: " << fragmentCount << endl; } } // while reading molecules (in this file) ifs.close(); ifs.clear(); } // while reading files // loop through the map and output frequencies map<string, int>::const_iterator indexItr; for (indexItr = index.begin(); indexItr != index.end(); ++indexItr) { cerr << (*indexItr).second << " INDEX " << (*indexItr).first << "\n"; } return(0); }
int main(int argc,char **argv) { OBForceField* pFF = OBForceField::FindForceField("Ghemical"); pFF->SetLogFile(&cout); pFF->SetLogLevel(OBFF_LOGLVL_LOW); OBMol mol; mol.Clear(); char commandline[100]; vector<string> vs; cout << endl; cout << "openbabel " << endl; cout << "M O L E C U L A R M E C H A N I C S" << endl; cout << " program" << endl; cout << " v 0.1 " << endl << endl; while (1) { cout << "command > "; cin.getline(commandline, 100); // // commands with no parameters // if (EQn(commandline, "quit", 4) || cin.eof()) { cout << "bye." << endl; exit(0); } if (EQn(commandline, "help", 4) || cin.eof()) { cout << endl; cout << "commands: description:" << endl; cout << "load <filename> load a molecule from filename" << endl; cout << "save <filename> save currently loaded molecule to filename" << endl; cout << "ff <forcefield> select the force field" << endl; cout << "forcefields print the available forcefields" << endl; cout << endl; cout << "energy calculate the energy" << endl; cout << "ebond calculate the bond stretching energy" << endl; cout << "eangle calculate the angle bending energy" << endl; cout << "estrbnd calculate the stretch-bending enregy" << endl; cout << "eoop calculate the out-of-plane bending energy" << endl; cout << "etorsion calculate the torsional energy" << endl; cout << "evdw calculate the Van der Waals energy" << endl; cout << "eeq calculate the electrostatic energy" << endl; cout << endl; cout << "sd <n> steepest descent energy minimization for n steps" << endl; cout << "cg <n> conjugate gradients energy minimization for n steps" << endl; cout << "" << endl; cout << "addH add hydrogens" << endl; cout << "delH delete hydrogens" << endl; cout << endl; cout << "gen generate/minimize a (random) structure" << endl; cout << "rs rotate around all rotatable bonds" << endl; cout << "nconf print the number of conformers" << endl; cout << "conf <n> select conformer n" << endl; cout << endl; cout << "quit quit" << endl; cout << endl; continue; } // calculate the energy if (EQn(commandline, "energy", 6)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " total energy = " << pFF->Energy() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "ebond", 5)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " bond stretching energy = " << pFF->E_Bond() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "eangle", 6)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " angle bending energy = " << pFF->E_Angle() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "estrbnd", 7)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " stretch-bending energy = " << pFF->E_StrBnd() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "eoop", 4)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " out-of-plane bending energy = " << pFF->E_OOP() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "etorsion", 8)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " torsional energy = " << pFF->E_Torsion() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "evdw", 4)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " Van der Waals energy = " << pFF->E_VDW() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "eeq", 3)) { if (mol.Empty()) { cout << "no molecule loaded." << endl; continue; } cout << endl << " electrostatic energy = " << pFF->E_Electrostatic() << " " << pFF->GetUnit() << endl << endl; continue; } if (EQn(commandline, "addH", 4)) { int num1, num2; num1 = mol.NumAtoms(); mol.AddHydrogens(false, true); num2 = mol.NumAtoms(); cout << (num2 - num1) << " hydrogens added." << endl; if (!pFF->Setup(mol)) { cout << "error while initializing the force field for this molecule." <<endl; continue; } continue; } if (EQn(commandline, "delH", 4)) { int num1, num2; num1 = mol.NumAtoms(); mol.DeleteHydrogens(); num2 = mol.NumAtoms(); cout << (num1 - num2) << " hydrogens deleted." << endl; if (!pFF->Setup(mol)) { cout << "error while initializing the force field for this molecule." <<endl; continue; } continue; } if (EQn(commandline, "gen", 3)) { //pFF->GenerateCoordinates(); pFF->UpdateCoordinates(mol); continue; } if (EQn(commandline, "rs", 2)) { pFF->SystematicRotorSearch(); pFF->UpdateCoordinates(mol); continue; } if (EQn(commandline, "nconf", 5)) { cout << endl << " number of conformers = " << mol.NumConformers() << endl << endl; continue; } // // commands with parameters // tokenize(vs, commandline); // select forcefield if (EQn(commandline, "ff", 2)) { if (vs.size() < 2) { cout << "no <forcefield> specified." << endl; continue; } pFF = OBForceField::FindForceField(vs[1]); if (!mol.Empty()) if (!pFF->Setup(mol)) cout << "error while initializing the force field (" << vs[1] << ") for this molecule." <<endl; continue; } // load <filename> if (EQn(commandline, "load", 4)) { if (vs.size() < 2) { cout << "no <filename> specified." << endl; continue; } ifstream ifs; OBConversion conv; OBFormat *format_in = conv.FormatFromExt(vs[1].c_str()); if (!format_in || !conv.SetInFormat(format_in)) { cout << "could not detect format." << endl; continue; } ifs.open(vs[1].c_str()); if (!ifs) { cout << "could not open '" << vs[1] << "'." <<endl; continue; } mol.Clear(); if (!conv.Read(&mol, &ifs)) { cout << "could not read a molecule from '" << vs[1] << "'." <<endl; continue; } if (mol.Empty()) { cout << "this molecule is empty." <<endl; continue; } if (!pFF->Setup(mol)) { cout << "error while initializing the force field for this molecule." <<endl; continue; } cout << "molecule succesfully loaded." << endl; cout << " " << mol.NumAtoms() << " atoms" << endl; cout << " " << mol.NumBonds() << " bonds" << endl; ifs.close(); continue; } // save <filename> if (EQn(commandline, "save", 4)) { if (vs.size() < 2) { cout << "no <filename> specified." << endl; continue; } ofstream ofs; OBConversion conv; OBFormat *format_out = conv.FormatFromExt(vs[1].c_str()); if (!format_out || !conv.SetOutFormat(format_out)) { cout << "could not detect format." << endl; continue; } ofs.open(vs[1].c_str()); if (!ofs) { cout << "could not open '" << vs[1] << "'." <<endl; continue; } if (!conv.Write(&mol, &ofs)) { cout << "could not read a molecule from '" << vs[1] << "'." <<endl; continue; } cout << "molecule succesfully saved." << endl; cout << " " << mol.NumAtoms() << " atoms" << endl; cout << " " << mol.NumBonds() << " bonds" << endl; ofs.close(); continue; } // steepest descent if (EQn(commandline, "sd", 2)) { if (vs.size() < 2) { cout << "no <n> steps specified." << endl; continue; } pFF->SteepestDescent(atoi(vs[1].c_str()), OBFF_ANALYTICAL_GRADIENT); pFF->UpdateCoordinates(mol); continue; } // conjugate gradients if (EQn(commandline, "cg", 2)) { if (vs.size() < 2) { cout << "no <n> steps specified." << endl; continue; } pFF->ConjugateGradients(atoi(vs[1].c_str()), OBFF_ANALYTICAL_GRADIENT); pFF->UpdateCoordinates(mol); continue; } cout << "invalid command." << endl; } return(1); }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); if (argc != 1) { cout << "Usage: conversion" << endl; cout << " Unit tests for OBConversion " << endl; return(-1); } cout << "# Unit tests for OBConversion \n"; // the number of tests for "prove" cout << "1..9\n"; cout << "ok 1\n"; // for loading tests OBMol obMol; OBConversion obConversion; obConversion.SetInAndOutFormats("smi", "mdl"); cout << "ok 2\n"; obConversion.ReadString(&obMol, "C1=CC=CS1"); cout << "ok 3\n"; if (obMol.NumAtoms() == 5) { cout << "ok 4\n"; } else { cout << "not ok 4\n"; } obMol.AddHydrogens(); if (obMol.NumAtoms() == 9) { cout << "ok 5\n"; } else { cout << "not ok 5\n"; } if ( (obConversion.WriteString(&obMol)).length() > 0) cout << "ok 6\n"; else cout << "not ok 6\n"; // PR#1474265 obConversion.WriteFile(&obMol, "test.mdl"); ifstream ifs("test.mdl"); if (ifs.good()) cout << "ok 7\n"; else cout << "not ok 7\n"; // PR#143577 obConversion.SetInFormat("mdl"); obConversion.ReadFile(&obMol, "test.mdl"); if ( remove("test.mdl") != -1) cout << "ok 8\n"; else cout << "not ok 8\n"; // gzip input // gzip output // multi-molecule reading // PR#1465586 // aromatics.smi // attype.00.smi //ReadFile() //Read() //WriteString() // GetOutputIndex() // IsLast //ReadString() //IsFirstInput //Read() // splitting // splitting using gzip-input // PR#1357705 // size 0 input // PR#1250900 // RegisterFormat // FindFormat // FormatFromExt // FormatFromMIME // GetNextFormat // GetDefaultFormat // BatchFileName // IncrementedFileName // option handling // AddOption // IsOption // RemoveOption // IsOption // SetOptions // IsOption // RegisterOptionParam // GetOptionParams // GetInStream // GetOutStream // SetInStream // SetOutStream // nasty tests obConversion.ReadString(&obMol, ""); obConversion.Read(&obMol); cout << "ok 9\n"; return(0); }
void WriteDistanceMatrix(ostream &ofs,OBMol &mol) { int columns = 7; unsigned int max, min = 1; unsigned int i,j; string type; OBAtom *atom, *atom2; char buffer[BUFF_SIZE]; double dst; max = columns; while (max <= mol.NumAtoms() + columns) { ofs << endl; if (min > mol.NumAtoms()) break; atom = mol.GetAtom(min); sprintf(buffer,"%15s%4d", etab.GetSymbol(atom->GetAtomicNum()), min); ofs << buffer; for (i = min + 1; ((i < max) && (i <= mol.NumAtoms())); i++) if (i <= mol.NumAtoms()) { atom = mol.GetAtom(i); sprintf(buffer,"%7s%4d", etab.GetSymbol(atom->GetAtomicNum()), i); ofs << buffer; } ofs << endl; sprintf(buffer,"%14s",""); ofs << buffer; for (i = min; i < max; i++) if (i <= mol.NumAtoms()) { sprintf(buffer,"%11s","-----------"); ofs << buffer; } ofs << endl; for (i = min; i <= mol.NumAtoms(); i++) { atom = mol.GetAtom(i); sprintf(buffer,"%4s%4d", etab.GetSymbol(atom->GetAtomicNum()), i); ofs << buffer; for (j = min; j < max; j++) if (j <= i) { atom2 = mol.GetAtom(j); dst = SQUARE(atom->GetX() - atom2->GetX()); dst += SQUARE(atom->GetY() - atom2->GetY()); dst += SQUARE(atom->GetZ() - atom2->GetZ()); dst = sqrt(dst); sprintf(buffer,"%10.4f ",dst); ofs << buffer; } ofs << endl; } max += columns - 1; min += columns - 1; } ofs << endl; }
int main(int argc,char **argv) { char *program_name= argv[0]; int c; int verbose = 0; bool hydrogens = false; string basename, filename = "", option, option2, ff = ""; if (argc < 2) { cout << "Usage: obenergy [options] <filename>" << endl; cout << endl; cout << "options: description:" << endl; cout << endl; cout << " -v verbose: print out indivual energy interactions" << endl; cout << endl; cout << " -h add hydrogens before calculating energy" << endl; cout << endl; cout << " -ff ffid select a forcefield" << endl; cout << endl; cout << " available forcefields:" << endl; cout << endl; OBPlugin::List("forcefields", "verbose"); exit(-1); } else { int ifile = 1; for (int i = 1; i < argc; i++) { option = argv[i]; if (option == "-v") { verbose = 1; ifile++; break; } if (option == "-h") { hydrogens = true; ifile++; } if ((option == "-ff") && (argc > (i+1))) { ff = argv[i+1]; ifile += 2; } } basename = filename = argv[ifile]; size_t extPos = filename.rfind('.'); if (extPos!= string::npos) { basename = filename.substr(0, extPos); } } // Find Input filetype OBConversion conv; OBFormat *format_in = conv.FormatFromExt(filename.c_str()); if (!format_in || !conv.SetInFormat(format_in)) { cerr << program_name << ": cannot read input format!" << endl; exit (-1); } ifstream ifs; ofstream ofs; // Read the file ifs.open(filename.c_str()); if (!ifs) { cerr << program_name << ": cannot read input file!" << endl; exit (-1); } OBForceField* pFF = OBForceField::FindForceField(ff); if (!pFF) { cerr << program_name << ": could not find forcefield '" << ff << "'." <<endl; exit (-1); } pFF->SetLogFile(&cout); if (verbose) pFF->SetLogLevel(OBFF_LOGLVL_HIGH); else pFF->SetLogLevel(OBFF_LOGLVL_MEDIUM); OBMol mol; double energy; for (c=1;;c++) { mol.Clear(); if (!conv.Read(&mol, &ifs)) break; if (mol.Empty()) break; if (hydrogens) mol.AddHydrogens(); if (!pFF->Setup(mol)) { cerr << program_name << ": could not setup force field." << endl; exit (-1); } energy = pFF->Energy(false); if (!isfinite(energy)) { cerr << " Title: " << mol.GetTitle() << endl; FOR_ATOMS_OF_MOL(atom, mol) { cerr << " x: " << atom->x() << " y: " << atom->y() << " z: " << atom->z() << endl; } } } // end for loop
bool ReadViewMol(istream &ifs,OBMol &mol,const char *title) { char buffer[BUFF_SIZE]; OBAtom *atom; double x,y,z, border; double factor = 1.0; int bgn, end, order; vector<string> vs; bool foundTitle = false; bool foundBonds = false; mol.BeginModify(); while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"$title") != NULL) { if (!ifs.getline(buffer,BUFF_SIZE)) return (false); mol.SetTitle(buffer); foundTitle = true; } else if (strstr(buffer,"$coord") != NULL) { tokenize(vs,buffer); if (vs.size() == 2) factor = atof((char*)vs[1].c_str()); // conversion to angstrom while (ifs.getline(buffer,BUFF_SIZE)) { if (buffer[0] == '$') break; tokenize(vs,buffer); if (vs.size() != 4) return(false); atom = mol.NewAtom(); x = atof((char*)vs[0].c_str()) * factor; y = atof((char*)vs[1].c_str()) * factor; z = atof((char*)vs[2].c_str()) * factor; atom->SetVector(x,y,z); //set coordinates atom->SetAtomicNum(etab.GetAtomicNum(vs[3].c_str())); } } else if (strstr(buffer,"$bonds") != NULL) { foundBonds = true; while (ifs.getline(buffer,BUFF_SIZE)) { if (buffer[0] == '$') break; sscanf(buffer,"%d %d %lf",&bgn,&end, &border); if (border > 1.0) order = int(border); else order = 1; mol.AddBond(bgn+1,end+1,order); } } else if (strstr(buffer,"$end") != NULL) break; } // while mol.EndModify(); if (!foundTitle) mol.SetTitle(title); if (!foundBonds) { mol.ConnectTheDots(); mol.PerceiveBondOrders(); } return(true); }
void CSRFormat::WriteCSRCoords(ostream &ofs,OBMol &mol) { int the_size,jconf; double x,y,z,energy; char title[100]; char *tag; the_size = sizeof(int) + sizeof(double) + (80 * sizeof(char)); jconf = 1; energy = -2.584565; snprintf(title, 80, "%s:%d",mol.GetTitle(),MolCount); tag = PadString(title,80); WriteSize(the_size,ofs); ofs.write((char*)&jconf,sizeof(int)); ofs.write((char*)&energy,sizeof(double)); ofs.write(tag,80*sizeof(char)); WriteSize(the_size,ofs); WriteSize(mol.NumAtoms()*sizeof(double),ofs); OBAtom *atom; vector<OBAtom*>::iterator i; for (atom = mol.BeginAtom(i); atom; atom = mol.NextAtom(i)) { x = atom->x(); ofs.write((char*)&x,sizeof(double)); } WriteSize(mol.NumAtoms()*sizeof(double),ofs); WriteSize(mol.NumAtoms()*sizeof(double),ofs); for (atom = mol.BeginAtom(i); atom; atom = mol.NextAtom(i)) { y = atom->y(); ofs.write((char*)&y,sizeof(double)); } WriteSize(mol.NumAtoms()*sizeof(double),ofs); WriteSize(mol.NumAtoms()*sizeof(double),ofs); for (atom = mol.BeginAtom(i); atom; atom = mol.NextAtom(i)) { z = atom->z(); ofs.write((char*)&z,sizeof(double)); } WriteSize(mol.NumAtoms()*sizeof(double),ofs); delete [] tag; }
bool parseConectRecord(char *buffer,OBMol &mol) { stringstream errorMsg; string clearError; // Setup strings and string buffers vector<string> vs; buffer[70] = '\0'; if (strlen(buffer) < 70) { errorMsg << "WARNING: Problems reading a PDB file\n" << " Problems reading a CONECT record.\n" << " According to the PDB specification,\n" << " the record should have 70 columns, but OpenBabel found " << strlen(buffer) << " columns." << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obInfo); errorMsg.str(clearError); } // Serial number of the first atom, read from column 7-11 of the // connect record, to which the other atoms connect to. long int startAtomSerialNumber; // A pointer to the first atom. OBAtom *firstAtom = NULL; // Serial numbers of the atoms which bind to firstAtom, read from // columns 12-16, 17-21, 22-27 and 27-31 of the connect record. Note // that we reserve space for 5 integers, but read only four of // them. This is to simplify the determination of the bond order; // see below. long int boundedAtomsSerialNumbers[5] = {0,0,0,0,0}; // Bools which tell us which of the serial numbers in // boundedAtomsSerialNumbers are read from the file, and which are // invalid bool boundedAtomsSerialNumbersValid[5] = {false, false, false, false, false}; // Pragmatic approach -- too many non-standard PDB files out there // (including some old ones from us) // So if we have a small number of atoms, then try to break by spaces // Otherwise (i.e., NumAtoms() > 9,999 we need to go by position) // We'll switch back and forth a few times to save duplicating common code if (mol.NumAtoms() <= 9999) { // make sure we don't look at salt bridges or whatever, so cut the buffer short buffer[32] = '\0'; tokenize(vs,buffer); if( vs.empty() || vs.size() < 2) return false; vs.erase(vs.begin()); // remove "CONECT" startAtomSerialNumber = atoi(vs[0].c_str()); } else { if (readIntegerFromRecord(buffer, 7, &startAtomSerialNumber) == false) { errorMsg << "WARNING: Problems reading a PDB file\n" << " Problems reading a CONECT record.\n" << " According to the PDB specification,\n" << " columns 7-11 should contain the serial number of an atom.\n" << " THIS CONECT RECORD WILL BE IGNORED." << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning); return(false); } } vector<OBAtom*>::iterator i; for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) { // atoms may not have residue information, but if they do, // check serial numbers if (a1->GetResidue() != NULL && static_cast<long int>(a1->GetResidue()-> GetSerialNum(a1)) == startAtomSerialNumber) { firstAtom = a1; break; } } if (firstAtom == NULL) { errorMsg << "WARNING: Problems reading a PDB file:\n" << " Problems reading a CONECT record.\n" << " According to the PDB specification,\n" << " columns 7-11 should contain the serial number of an atom.\n" << " No atom was found with this serial number.\n" << " THIS CONECT RECORD WILL BE IGNORED." << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning); return(false); } if (mol.NumAtoms() < 9999) { if (vs.size() > 1) boundedAtomsSerialNumbers[0] = atoi(vs[1].c_str()); if (vs.size() > 2) boundedAtomsSerialNumbers[1] = atoi(vs[2].c_str()); if (vs.size() > 3) boundedAtomsSerialNumbers[2] = atoi(vs[3].c_str()); if (vs.size() > 4) boundedAtomsSerialNumbers[3] = atoi(vs[4].c_str()); unsigned int limit = 4; if (vs.size() <= 4) limit = vs.size() - 1; for (unsigned int s = 0; s < limit; ++s) boundedAtomsSerialNumbersValid[s] = true; } else { // Now read the serial numbers. If the first serial number is not // present, this connect record probably contains only hydrogen // bonds and salt bridges, which we ignore. In that case, we just // exit gracefully. boundedAtomsSerialNumbersValid[0] = readIntegerFromRecord(buffer, 12, boundedAtomsSerialNumbers+0); if (boundedAtomsSerialNumbersValid[0] == false) return(true); boundedAtomsSerialNumbersValid[1] = readIntegerFromRecord(buffer, 17, boundedAtomsSerialNumbers+1); boundedAtomsSerialNumbersValid[2] = readIntegerFromRecord(buffer, 22, boundedAtomsSerialNumbers+2); boundedAtomsSerialNumbersValid[3] = readIntegerFromRecord(buffer, 27, boundedAtomsSerialNumbers+3); } // Now iterate over the VALID boundedAtomsSerialNumbers and connect // the atoms. for(unsigned int k=0; boundedAtomsSerialNumbersValid[k]; k++) { // Find atom that is connected to, write an error message OBAtom *connectedAtom = 0L; for (OBAtom *a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) { // again, atoms may not have residues, but if they do, check serials if (a1->GetResidue() != NULL && static_cast<long int>(a1->GetResidue()-> GetSerialNum(a1)) == boundedAtomsSerialNumbers[k]) { connectedAtom = a1; break; } } if (connectedAtom == 0L) { errorMsg << "WARNING: Problems reading a PDB file:\n" << " Problems reading a CONECT record.\n" << " According to the PDB specification,\n" << " Atoms with serial #" << startAtomSerialNumber << " and #" << boundedAtomsSerialNumbers[k] << " should be connected\n" << " However, an atom with serial #" << boundedAtomsSerialNumbers[k] << " was not found.\n" << " THIS CONECT RECORD WILL BE IGNORED." << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning); return(false); } // Figure the bond order unsigned char order = 0; while(boundedAtomsSerialNumbersValid[k+order+1] && (boundedAtomsSerialNumbers[k+order] == boundedAtomsSerialNumbers[k+order+1])) order++; k += order; // Generate the bond if (firstAtom->GetIdx() < connectedAtom->GetIdx()) { // record the bond 'in one direction' only OBBond *bond = mol.GetBond(firstAtom, connectedAtom); if (!bond) mol.AddBond(firstAtom->GetIdx(), connectedAtom->GetIdx(), order+1); else // An additional CONECT record with the same firstAtom that references // a bond created in the previous CONECT record. // For example, the 1136->1138 double bond in the following: // CONECT 1136 1128 1137 1137 1138 // CONECT 1136 1138 1139 bond->SetBondOrder(bond->GetBondOrder() + order+1); } } return(true); }