bool QEqCharges::ComputeCharges(OBMol &mol) { /////////////////////////////////////////////////////////////////////////////// //Some OpenBabel bookkeeping that I copied from the Gasteiger scheme mol.SetPartialChargesPerceived(); OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue("QEq"); dp->SetOrigin(perceived); mol.SetData(dp); /////////////////////////////////////////////////////////////////////////////// //Read in atomic information from OpenBabel molecule and parameterize //Read in total number of atoms int i, N = mol.NumAtoms(); Hardness = MatrixXd::Zero(N+1, N+1); Voltage = VectorXd::Zero(N+1); Electronegativity = VectorXd::Zero(N); VectorXd BasisSet = VectorXd::Zero(N); Vector3d Parameters; FOR_ATOMS_OF_MOL(atom, mol) { Parameters = GetParameters(atom->GetAtomicNum(), atom->GetFormalCharge()); i = atom->GetIdx() - 1; if (Parameters[0] == 0.) { stringstream msg; msg << "Some QEq Parameters not found!" << endl << "Parameters not found for atom no. " << i+1 << endl << "Atom will be ignored in the charge computation."; obErrorLog.ThrowError(__FUNCTION__, msg.str(), obError); } Electronegativity[i] = Parameters[0]; Hardness(i,i) = Parameters[1]; BasisSet[i] = Parameters[2]; }
bool OutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { // so we want to read through the file until we can figure out // what program actually created it // if we get to the end, emit a warning istream &ifs = *pConv->GetInStream(); char buffer[BUFF_SIZE]; OBFormat *pFormat = NULL; std::string formatName; // the detection strings are from the Chemical MIME project // http://chemical-mime.sourceforge.net/chemical-mime-data.html while (ifs.getline(buffer,BUFF_SIZE)) { if ((strstr(buffer,"GAMESS execution script") != NULL) || (strstr(buffer,"PC GAMESS") != NULL) || (strstr(buffer,"GAMESS VERSION") != NULL)) { // GAMESS output formatName = "gamout"; break; } else if (strstr(buffer,"=== G A M E S S - U K === ") != NULL) { // GAMESS-UK output formatName = "gukout"; break; } else if (strstr(buffer,"Gaussian, Inc") != NULL) { // Gaussian output formatName = "g03"; break; } else if (strstr(buffer,"GENERAL UTILITY LATTICE PROGRAM") != NULL) { // GULP output -- not currently supported break; } else if (strstr(buffer,"MOPAC") != NULL) { // MOPAC output formatName = "mopout"; break; } else if (strstr(buffer,"Program PWSCF") != NULL) { // PWSCF formatName = "pwscf"; break; } else if (strstr(buffer,"Welcome to Q-Chem") != NULL) { // Q-Chem output formatName = "qcout"; break; } else if (strstr(buffer,"Amsterdam Density Functional") != NULL) { // ADF output // Determine the kind of ADF output while (ifs.getline(buffer, BUFF_SIZE)) { if (strstr(buffer, "| A D F |") != NULL) { formatName = "adfout"; break; } else if (strstr(buffer, "| B A N D |") != NULL) { formatName = "adfband"; break; } else if (strstr(buffer, "| D F T B |") != NULL) { formatName = "adfdftb"; break; } else if (strstr(buffer, "DFTB Engine") != NULL) { // "| D F T B |" is no longer printed in ADF 2018 // Hopefully, "DFTB Engine" will work fine... formatName = "adfdftb"; break; } } break; } else if (strstr(buffer,"Northwest Computational Chemistry") != NULL) { // NWChem output formatName = "nwo"; break; } else if (strstr(buffer,"MPQC: Massively Parallel Quantum Chemistry") != NULL) { // MPQC output formatName = "mpqc"; break; } else if (strstr(buffer,"PROGRAM SYSTEM MOLPRO") != NULL) { // MOLPRO output formatName = "mpo"; break; } else if ((strstr(buffer,"Schrodinger, Inc.") != NULL) && (strstr(buffer,"Jaguar") != NULL)) { // Jaguar formatName = "jout"; break; } else if (strstr(buffer, "ABINIT") != NULL) { // Abinit formatName = "abinit"; break; } else if (strstr(buffer, "ACES2") != NULL) { // ACESII formatName = "acesout"; break; } else if (strstr(buffer, "CRYSTAL06") != NULL || strstr(buffer, "CRYSTAL09") != NULL) { // CRYSTAL09 formatName = "c09out"; break; } else if (strstr(buffer, "* O R C A *") != NULL) { // ORCA formatName = "orca"; break; } else if (strstr(buffer, "WELCOME TO SIESTA") != NULL) { // SIESTA formatName = "siesta"; break; } } // if we assigned something above, let's try to find it if (formatName.length()) pFormat = pConv->FindFormat(formatName); if (pFormat) { ifs.seekg (0, ios::beg); // reset the stream to the beginning ifs.clear(); bool success = pFormat->ReadMolecule(pOb, pConv); // Tag the molecule with the format (e.g., if a program wants to know the kind of "out" or "log" file) // We have to do this *after* ReadMolecule returns, or the data might be cleared if (pOb) { OBPairData *dp = new OBPairData; dp->SetAttribute("File Format"); dp->SetValue(formatName); dp->SetOrigin(fileformatInput); pOb->SetData(dp); } return success; } obErrorLog.ThrowError(__FUNCTION__, "Problems reading an output file: Could not determine the format of this file. Please report it to the openbabel-discuss @ lists.sourceforge.net mailing list.", obError); return(false); // we couldn't figure out the format }
bool RXNFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if (pmol == NULL) return false; OBFormat* pMolFormat = pConv->FindFormat("MOL"); if (pMolFormat==NULL) return false; istream &ifs = *pConv->GetInStream(); string ln; // When MDLFormat reads the last product it may also read and discard // the line with $RXN for the next reaction. But it then sets $RXNread option. if(pConv->IsOption("$RXNread")) pConv->RemoveOption("$RXNread", OBConversion::OUTOPTIONS); else { if (!getline(ifs,ln)) return(false); if(Trim(ln).find("$RXN")!=0) return false; //Has to start with $RXN } if (!getline(ifs,ln)) return false; //reaction title pmol->SetTitle(Trim(ln)); if (!getline(ifs,ln)) return false; //creator if (!getline(ifs, ln)) return false; //comment // Originally the comment was added to the reaction via: // pmol->SetComment(Trim(ln)); if (!getline(ifs, ln)) return false; // num reactants, products, and optionally agents unsigned int nReactants = 0, nProducts = 0, nAgents = 0; bool ok = ParseComponent(ln.c_str() + 0, &nReactants); if (!ok) return false; ok = ParseComponent(ln.c_str() + 3, &nProducts); if (!ok) return false; if (ln[6] != '\0') { // optional agents ok = ParseComponent(ln.c_str() + 6, &nAgents); if (!ok) return false; } if(nReactants + nProducts + nAgents) { //Read the first $MOL. The others are read at the end of the previous MOL if(!getline(ifs, ln)) return false; if(Trim(ln).find("$MOL")==string::npos) return false; } OBReactionFacade rxnfacade(pmol); // Note: If we supported it, we could read each of the rxn components directly // into the returned OBMol instead of having to do a copy. Unfortunately, // this isn't possible at the moment (MOL format will need some work first). // Here is some example code to do it: // //unsigned int old_numatoms = 0; //unsigned int compid = 1; //for (int i = 0; i<nReactants; i++) //{ // //Read a MOL file using the same OBConversion object but with a different format // if (!pMolFormat->ReadMolecule(pmol, pConv)) // obErrorLog.ThrowError(__FUNCTION__, "Failed to read a reactant", obWarning); // unsigned int numatoms = pmol->NumAtoms(); // for (unsigned int idx = old_numatoms + 1; idx <= numatoms; ++idx) { // OBAtom* atom = pmol->GetAtom(idx); // rxnfacade.SetRole(atom, REACTANT); // rxnfacade.SetComponentId(atom, compid); // } // old_numatoms = numatoms; // compid++; //} const char* type[3] = {"a reactant", "a product", "an agent"}; OBReactionRole role; unsigned int num_components; for(unsigned int N=0; N<3; N++) { switch(N) { case 0: role = REACTANT; num_components = nReactants; break; case 1: role = PRODUCT; num_components = nProducts; break; case 2: role = AGENT; num_components = nAgents; break; } for (int i=0; i<num_components; i++) { //Read a MOL file using the same OBConversion object but with a different format OBMol mol; if (!pMolFormat->ReadMolecule(&mol, pConv)) { std::string error = "Failed to read "; error += type[N]; obErrorLog.ThrowError(__FUNCTION__, error, obWarning); continue; } if (mol.NumAtoms() == 0) { OBAtom* dummy = mol.NewAtom(); // Treat the empty OBMol as having a single dummy atom OBPairData *pd = new OBPairData(); pd->SetAttribute("rxndummy"); pd->SetValue(""); pd->SetOrigin(fileformatInput); dummy->SetData(pd); } rxnfacade.AddComponent(&mol, role); } } pmol->SetIsReaction(); return true; }
//! \return whether partial charges were successfully assigned to this molecule bool EQEqCharges::ComputeCharges(OBMol &mol) { int i, j, a, c, N = mol.NumAtoms(); double cellVolume; VectorXf chi(N), J(N), b(N), x(N); MatrixXf J_ij(N, N), A(N, N); OBUnitCell *obuc; matrix3x3 unitcell, fourier; vector3 dx; int numNeighbors[3]; OBAtom *atom; // If parameters have not yet been loaded, do that if (!_paramFileLoaded) { if (ParseParamFile()) { _paramFileLoaded = true; } else { return false; } } // Calculate atomic properties based around their ionic charge for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); a = atom->GetAtomicNum(); c = _chargeCenter[a]; // Fail if ionization data is missing for any atom in the molecule if (_ionizations[a][c + 1] == -1 || _ionizations[a][c] == -1 || a > TABLE_OF_ELEMENTS_SIZE) { obErrorLog.ThrowError(__FUNCTION__, "Insufficient ionization data for atoms in the given molecule. Update `data/eqeqIonizations.txt` with missing information and re-run this function.", obError); return false; } J(i) = _ionizations[a][c + 1] - _ionizations[a][c]; chi(i) = 0.5 * (_ionizations[a][c + 1] + _ionizations[a][c]) - (a == 1? 0 : c * J(i)); } // If a unit cell is defined, use the periodic Ewald calculation if (mol.HasData(OBGenericDataType::UnitCell)) { // Get unit cell and calculate its Fourier transform + volume obuc = (OBUnitCell *) mol.GetData(OBGenericDataType::UnitCell); unitcell = obuc->GetCellMatrix(); fourier = (2 * PI * unitcell.inverse()).transpose(); cellVolume = obuc->GetCellVolume(); // Get the number of radial unit cells to use in x, y, and z numNeighbors[0] = int(ceil(minCellLength / (2.0 * (obuc->GetA())))) - 1; numNeighbors[1] = int(ceil(minCellLength / (2.0 * (obuc->GetB())))) - 1; numNeighbors[2] = int(ceil(minCellLength / (2.0 * (obuc->GetC())))) - 1; for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); for (j = 0; j < N; j++) { dx = atom->GetVector() - (mol.GetAtom(j + 1))->GetVector(); J_ij(i, j) = GetPeriodicEwaldJij(J(i), J(j), dx, (i == j), unitcell, fourier, cellVolume, numNeighbors); } } // If no unit cell, use the simplified nonperiodic calculation } else { for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); for (j = 0; j < N; j++) { J_ij(i, j) = GetNonperiodicJij(J(i), J(j), atom->GetDistance(j + 1), (i == j)); } return false; } } // Formulate problem as A x = b, where x is the calculated partial charges // First equation is a simple overall balance: sum(Q) = 0 A.row(0) = VectorXf::Ones(N); b(0) = 0; // Remaining equations are based off of the fact that, at equilibrium, the // energy of the system changes equally for a change in any charge: // dE/dQ_1 = dE/dQ_2 = ... = dE/dQ_N A.block(1, 0, N - 1, N) = J_ij.block(0, 0, N - 1, N) - J_ij.block(1, 0, N - 1, N); b.tail(N - 1) = chi.tail(N - 1) - chi.head(N - 1); // The solution is a list of charges in the system x = A.colPivHouseholderQr().solve(b); // Now we are done calculating, pass all this back to OpenBabel molecule mol.SetPartialChargesPerceived(); OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue("EQEq"); dp->SetOrigin(perceived); mol.SetData(dp); m_partialCharges.clear(); m_partialCharges.reserve(N); m_formalCharges.clear(); m_formalCharges.reserve(N); for (i = 0; i < N; i ++) { atom = mol.GetAtom(i + 1); atom->SetPartialCharge(x(i)); m_partialCharges.push_back(x(i)); m_formalCharges.push_back(atom->GetFormalCharge()); } obErrorLog.ThrowError(__FUNCTION__, "EQEq charges successfully assigned.", obInfo); return true; }
bool OpAlign::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; map<string,string>::const_iterator itr; // Is there an -s option? if(pConv->IsFirstInput()) { _pOpIsoM = NULL; //assume no -s option itr = pmap->find("s"); if(itr!=pmap->end()) { //There is an -s option; check it is ok _pOpIsoM = static_cast<OpNewS*>(OBOp::FindType("s")); _stext = itr->second; //get its parameter(s) if(!_pOpIsoM || _stext.empty()) { obErrorLog.ThrowError(__FUNCTION__, "No parameter on -s option, or its OBOp version is not loaded", obError); pConv->SetOneObjectOnly(); //to finish return false; } } } // If the output format is a 2D depiction format, then we should align // on the 2D coordinates and not the 3D coordinates (if present). This //means we need to generate the 2D coordinates at this point. if(pmol->GetDimension()==3 && (pConv->GetOutFormat()->Flags() & DEPICTION2D)) { OBOp* pgen = OBOp::FindType("gen2D"); if(pgen) pgen->Do(pmol); } // All molecules must have coordinates, so add them if 0D // They may be added again later when gen2D or gen3D is called, but they will be the same. // It would be better if this op was called after them, which would happen // if its name was alphabetically after "gen" (and before "s"). if(pmol->GetDimension()==0) { //Will the coordinates be 2D or 3D? itr = pmap->find("gen3D"); OBOp* pgen = (itr==pmap->end()) ? OBOp::FindType("gen2D") : OBOp::FindType("gen3D"); if(pgen) pgen->Do(pmol); } //Do the alignment in 2D if the output format is svg, png etc. and there is no -xn option if(pmol->GetDimension()==3 && pConv && !pConv->IsOption("n")) { OBFormat* pOutFormat = pConv->GetOutFormat(); if(pOutFormat->Flags() & DEPICTION2D) { OBOp* pgen = OBOp::FindType("gen2D"); if(pgen) pgen->Do(pmol); } } if(pConv->IsFirstInput() || _refMol.NumAtoms()==0) { _refvec.clear(); // Reference molecule is basically the first molecule _refMol = *pmol; if(!_pOpIsoM) //no -s option. Use a molecule reference. _align.SetRefMol(_refMol); else { //If there is a -s option, reference molecule has only those atoms that are matched //Call the -s option from here bool ret = _pOpIsoM->Do(pmol, _stext.c_str(), pmap, pConv); // Get the atoms that were matched vector<int> ats = _pOpIsoM->GetMatchAtoms(); if(!ats.empty()) { // Make a vector of the matching atom coordinates... for(vector<int>::iterator iter=ats.begin(); iter!=ats.end(); ++iter) _refvec.push_back((pmol->GetAtom(*iter))->GetVector()); // ...and use a vector reference _align.SetRef(_refvec); } // Stop -s option being called normally, although it will still be called once // in the DoOps loop already started for the current (first) molecule. pConv->RemoveOption("s",OBConversion::GENOPTIONS); if(!ret) { // the first molecule did not match the -s option so a reference molecule // could not be made. Keep trying. _refMol.Clear(); //obErrorLog.ThrowError(__FUNCTION__, "The first molecule did not match the -s option\n" // "so the reference structure was not derived from it", obWarning, onceOnly); return false; //not matched } } } //All molecules if(pmol->GetDimension()!= _refMol.GetDimension()) { stringstream ss; ss << "The molecule" << pmol->GetTitle() << " does not have the same dimensions as the reference molecule " << _refMol.GetTitle() << " and is ignored."; obErrorLog.ThrowError(__FUNCTION__, ss.str().c_str(), obError); return false; } if(_pOpIsoM) //Using -s option { //Ignore mol if it does not pass -s option if(!_pOpIsoM->Do(pmol, "", pmap, pConv)) // "" means will use existing parameters return false; // Get the atoms equivalent to those in ref molecule vector<int> ats = _pOpIsoM->GetMatchAtoms(); // Make a vector of their coordinates and get the centroid vector<vector3> vec; vector3 centroid; for(vector<int>::iterator iter=ats.begin(); iter!=ats.end(); ++iter) { vector3 v = pmol->GetAtom(*iter)->GetVector(); centroid += v; vec.push_back(v); } centroid /= vec.size(); // Do the alignment _align.SetTarget(vec); if(!_align.Align()) return false; // Get the centroid of the reference atoms vector3 ref_centroid; for(vector<vector3>::iterator iter=_refvec.begin(); iter!=_refvec.end(); ++iter) ref_centroid += *iter; ref_centroid /= _refvec.size(); //subtract the centroid, rotate the target molecule, then add the centroid matrix3x3 rotmatrix = _align.GetRotMatrix(); for (unsigned int i = 1; i <= pmol->NumAtoms(); ++i) { vector3 tmpvec = pmol->GetAtom(i)->GetVector(); tmpvec -= centroid; tmpvec *= rotmatrix; //apply the rotation tmpvec += ref_centroid; pmol->GetAtom(i)->SetVector(tmpvec); } } else //Not using -s option) { _align.SetTargetMol(*pmol); if(!_align.Align()) return false; _align.UpdateCoords(pmol); } //Save rmsd as a property OBPairData* dp = new OBPairData; dp->SetAttribute("rmsd"); double val = _align.GetRMSD(); if(val<1e-12) val = 0.0; dp->SetValue(toString(val)); dp->SetOrigin(local); pmol->SetData(dp); return true; }
OBBase* OBMol::DoTransformations(const std::map<std::string, std::string>* pOptions, OBConversion* pConv) { // Perform any requested transformations // on a OBMol //The input map has option letters or name as the key and //any associated text as the value. //For normal(non-filter) transforms: // returns a pointer to the OBMol (this) if ok or NULL if not. //For filters returns a pointer to the OBMol (this) if there is a match, //and NULL when not and in addition the OBMol object is deleted NULL. //This is now a virtual function. The OBBase version just returns the OBMol pointer. //This is declared in mol.h //The filter options, s and v allow a obgrep facility. //Used together they must both be true to allow a molecule through. //Parse GeneralOptions if(pOptions->empty()) return this; // DoOps calls Do() for each of the plugin options in the map // It normally returns true, even if there are no options but // can return false if one of the options decides that the // molecule should not be output. If it is a filtering op, it // should delete the molecule itself (unlike the -s, --filter options, // which delete it in this function). if(!OBOp::DoOps(this, pOptions, pConv)) return (OBBase *)NULL; bool ret=true; map<string,string>::const_iterator itr, itr2; if(pOptions->find("b")!=pOptions->end()) if(!ConvertDativeBonds()) ret=false; if(pOptions->find("d")!=pOptions->end()) if(!DeleteHydrogens()) ret=false; if(pOptions->find("h")!=pOptions->end()) if(!AddHydrogens(false, false)) ret=false; if(pOptions->find("r")!=pOptions->end()) { StripSalts(); ret = true; } itr = pOptions->find("p"); if(itr!=pOptions->end()) { double pH = strtod(itr->second.c_str(), 0); if(!AddHydrogens(false, true, pH)) ret=false; } if(pOptions->find("c")!=pOptions->end()) Center(); itr = pOptions->find("title"); //Replaces title if(itr!=pOptions->end()) SetTitle(itr->second.c_str()); itr = pOptions->find("addtotitle"); //Appends text to title if(itr!=pOptions->end()) { string title(GetTitle()); title += itr->second; SetTitle(title.c_str()); } /* itr = pOptions->find("addformula"); //Appends tab + formula to title if(itr!=pOptions->end()) { string title(GetTitle()); title += '\t' + GetSpacedFormula(1,"");//actually unspaced SetTitle(title.c_str()); } */ //Add an extra property to the molecule. //Parameter has atrribute and value separated by a space itr = pOptions->find("property"); if(itr!=pOptions->end()) { string txt(itr->second); string::size_type pos = txt.find(' '); if(pos==string::npos) { obErrorLog.ThrowError(__FUNCTION__, "Missing property value", obError); ret=false; } else { string attr(txt.substr(0,pos)), val(txt.substr(pos+1)); //Update value if it already exists OBPairData* dp = dynamic_cast<OBPairData*>(GetData(attr)); if(dp) { dp->SetValue(val); dp->SetOrigin(userInput); } else { // Pair did not exist; make new one dp = new OBPairData; dp->SetAttribute(attr); dp->SetValue(val); dp->SetOrigin(userInput); SetData(dp); } } } itr = pOptions->find("add"); //adds new properties from descriptors in list if(itr!=pOptions->end()) OBDescriptor::AddProperties(this, itr->second); itr = pOptions->find("delete"); //deletes the specified properties if(itr!=pOptions->end()) OBDescriptor::DeleteProperties(this, itr->second); itr = pOptions->find("append"); //Appends values of descriptors or properties to title if(itr!=pOptions->end()) { string title(GetTitle()); title += OBDescriptor::GetValues(this, itr->second); if(ispunct(title[0])) title[0]=' ';//a leading punct char is used only as a separator, not at start SetTitle(Trim(title).c_str()); } //Filter using OBDescriptor comparison and (older) SMARTS tests //Continue only if previous test was true. bool fmatch = true; itr = pOptions->find("filter"); if(itr!=pOptions->end()) { std::istringstream optionText(itr->second); fmatch = OBDescriptor::FilterCompare(this, optionText, false); } if(fmatch) { itr = pOptions->find("v"); if(itr!=pOptions->end() && !itr->second.empty()) { //inverse match quoted SMARTS string which follows OBSmartsPattern sp; sp.Init(itr->second); fmatch = !sp.Match(*this); //(*pmol) ; } } if(fmatch) { itr = pOptions->find("s"); if(itr!=pOptions->end() && !itr->second.empty()) { //SMARTS filter //If exactmatch option set (probably in fastsearchformat) the //number of atoms in the pattern (passed as a string in the option text) //has to be the same as in the molecule. itr2 = pOptions->find("exactmatch"); if(itr2!=pOptions->end() && NumHvyAtoms()!=atoi(itr2->second.c_str())) fmatch=false; else { //match quoted SMARTS string which follows OBSmartsPattern sp; sp.Init(itr->second.c_str()); fmatch = sp.Match(*this); } } } if(!fmatch) { //filter failed: delete OBMol and return NULL delete this; return NULL; } else { if(ret==false) { obErrorLog.ThrowError(__FUNCTION__, "Error executing an option", obError); delete this; //added 9March2006 return NULL; } else return this; } }
// Reading Gaussian output has been tested for G98 and G03 to some degree // If you have problems (or examples of older output), please contact // the [email protected] mailing list and/or post a bug bool GaussianOutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* title = pConv->GetTitle(); char buffer[BUFF_SIZE]; string str,str1,str2,thermo_method; double x,y,z; OBAtom *atom; vector<string> vs,vs2; int total_charge = 0; unsigned int spin_multiplicity = 1; bool hasPartialCharges = false; string chargeModel; // descriptor for charges (e.g. "Mulliken") // Variable for G2/G3/G4 etc. calculations double ezpe,Hcorr,Gcorr,E0,CV; bool ezpe_set=false,Hcorr_set=false,Gcorr_set=false,E0_set=false,CV_set=false; double temperature = 0; /* Kelvin */ std::vector<double> Scomponents; // Electrostatic potential OBFreeGrid *esp = NULL; // coordinates of all steps // Set conformers to all coordinates we adopted std::vector<double*> vconf; // index of all frames/conformers std::vector<double> coordinates; // coordinates in each frame int natoms = 0; // number of atoms -- ensure we don't go to a new job with a different molecule // OBConformerData stores information about multiple steps // we can change attribute later if needed (e.g., IRC) OBConformerData *confData = new OBConformerData(); confData->SetOrigin(fileformatInput); std::vector<unsigned short> confDimensions = confData->GetDimension(); // to be fair, set these all to 3D std::vector<double> confEnergies = confData->GetEnergies(); std::vector< std::vector< vector3 > > confForces = confData->GetForces(); //Vibrational data std::vector< std::vector< vector3 > > Lx; std::vector<double> Frequencies, Intensities; //Rotational data std::vector<double> RotConsts(3); int RotSymNum=1; OBRotationData::RType RotorType = OBRotationData::UNKNOWN; // Translation vectors (if present) vector3 translationVectors[3]; int numTranslationVectors = 0; //Electronic Excitation data std::vector<double> Forces, Wavelengths, EDipole, RotatoryStrengthsVelocity, RotatoryStrengthsLength; // Orbital data std::vector<double> orbitals; std::vector<std::string> symmetries; int aHOMO, bHOMO, betaStart; aHOMO = bHOMO = betaStart = -1; int i=0; bool no_symmetry=false; char coords_type[25]; //Prescan file to find second instance of "orientation:" //This will be the kind of coords used in the chk/fchk file //Unless the "nosym" keyword has been requested while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"Symmetry turned off by external request.") != NULL) { // The "nosym" keyword has been requested no_symmetry = true; } if (strstr(buffer,"orientation:") !=NULL) { i++; tokenize (vs, buffer); // gotta check what types of orientation are present strncpy (coords_type, vs[0].c_str(), 24); strcat (coords_type, " orientation:"); } if ((no_symmetry && i==1) || i==2) break; } // Reset end-of-file pointers etc. ifs.clear(); ifs.seekg(0); //rewind mol.BeginModify(); while (ifs.getline(buffer,BUFF_SIZE)) { if(strstr(buffer, "Entering Gaussian") != NULL) { //Put some metadata into OBCommentData string comment("Gaussian "); if(NULL != strchr(buffer,'=')) { comment += strchr(buffer,'=')+2; comment += ""; for(unsigned i=0; i<115 && ifs; ++i) { ifs.getline(buffer,BUFF_SIZE); if(strstr(buffer,"Revision") != NULL) { if (buffer[strlen(buffer)-1] == ',') { buffer[strlen(buffer)-1] = '\0'; } add_unique_pairdata_to_mol(&mol,"program",buffer,0); } else if(buffer[1]=='#') { //the line describing the method comment += buffer; OBCommentData *cd = new OBCommentData; cd->SetData(comment); cd->SetOrigin(fileformatInput); mol.SetData(cd); tokenize(vs,buffer); if (vs.size() > 1) { char *str = strdup(vs[1].c_str()); char *ptr = strchr(str,'/'); if (NULL != ptr) { *ptr = ' '; add_unique_pairdata_to_mol(&mol,"basis",ptr,0); *ptr = '\0'; add_unique_pairdata_to_mol(&mol,"method",str,0); } } break; } } } } else if (strstr(buffer,"Multiplicity") != NULL) { tokenize(vs, buffer, " \t\n"); if (vs.size() == 6) { total_charge = atoi(vs[2].c_str()); spin_multiplicity = atoi(vs[5].c_str()); } ifs.getline(buffer,BUFF_SIZE); } else if (strstr(buffer, coords_type) != NULL) { numTranslationVectors = 0; // ignore old translationVectors ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size()>4) { int corr = vs.size()==5 ? -1 : 0; //g94; later versions have an extra column x = atof((char*)vs[3+corr].c_str()); y = atof((char*)vs[4+corr].c_str()); z = atof((char*)vs[5+corr].c_str()); int atomicNum = atoi((char*)vs[1].c_str()); if (atomicNum > 0) // translation vectors are "-2" { if (natoms == 0) { // first time reading the molecule, create each atom atom = mol.NewAtom(); atom->SetAtomicNum(atoi((char*)vs[1].c_str())); } coordinates.push_back(x); coordinates.push_back(y); coordinates.push_back(z); } else { translationVectors[numTranslationVectors++].Set(x, y, z); } if (!ifs.getline(buffer,BUFF_SIZE)) { break; } tokenize(vs,buffer); } // done with reading atoms natoms = mol.NumAtoms(); if(natoms==0) return false; // malloc / memcpy double *tmpCoords = new double [(natoms)*3]; memcpy(tmpCoords, &coordinates[0], sizeof(double)*natoms*3); vconf.push_back(tmpCoords); coordinates.clear(); confDimensions.push_back(3); // always 3D -- OBConformerData allows mixing 2D and 3D structures } else if(strstr(buffer,"Dipole moment") != NULL) { ifs.getline(buffer,BUFF_SIZE); // actual components X ### Y #### Z ### tokenize(vs,buffer); if (vs.size() >= 6) { OBVectorData *dipoleMoment = new OBVectorData; dipoleMoment->SetAttribute("Dipole Moment"); double x, y, z; x = atof(vs[1].c_str()); y = atof(vs[3].c_str()); z = atof(vs[5].c_str()); dipoleMoment->SetData(x, y, z); dipoleMoment->SetOrigin(fileformatInput); mol.SetData(dipoleMoment); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Traceless Quadrupole moment") != NULL) { ifs.getline(buffer,BUFF_SIZE); // actual components XX ### YY #### ZZ ### tokenize(vs,buffer); ifs.getline(buffer,BUFF_SIZE); // actual components XY ### XZ #### YZ ### tokenize(vs2,buffer); if ((vs.size() >= 6) && (vs2.size() >= 6)) { double Q[3][3]; OpenBabel::OBMatrixData *quadrupoleMoment = new OpenBabel::OBMatrixData; Q[0][0] = atof(vs[1].c_str()); Q[1][1] = atof(vs[3].c_str()); Q[2][2] = atof(vs[5].c_str()); Q[1][0] = Q[0][1] = atof(vs2[1].c_str()); Q[2][0] = Q[0][2] = atof(vs2[3].c_str()); Q[2][1] = Q[1][2] = atof(vs2[5].c_str()); matrix3x3 quad(Q); quadrupoleMoment->SetAttribute("Traceless Quadrupole Moment"); quadrupoleMoment->SetData(quad); quadrupoleMoment->SetOrigin(fileformatInput); mol.SetData(quadrupoleMoment); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Exact polarizability") != NULL) { // actual components XX, YX, YY, XZ, YZ, ZZ tokenize(vs,buffer); if (vs.size() >= 8) { double Q[3][3]; OpenBabel::OBMatrixData *pol_tensor = new OpenBabel::OBMatrixData; Q[0][0] = atof(vs[2].c_str()); Q[1][1] = atof(vs[4].c_str()); Q[2][2] = atof(vs[7].c_str()); Q[1][0] = Q[0][1] = atof(vs[3].c_str()); Q[2][0] = Q[0][2] = atof(vs[5].c_str()); Q[2][1] = Q[1][2] = atof(vs[6].c_str()); matrix3x3 pol(Q); pol_tensor->SetAttribute("Exact polarizability"); pol_tensor->SetData(pol); pol_tensor->SetOrigin(fileformatInput); mol.SetData(pol_tensor); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Total atomic charges") != NULL || strstr(buffer,"Mulliken atomic charges") != NULL) { hasPartialCharges = true; chargeModel = "Mulliken"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"Sum of ") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if (strstr(buffer, "Atomic Center") != NULL) { // Data points for ESP calculation tokenize(vs,buffer); if (NULL == esp) esp = new OpenBabel::OBFreeGrid(); if (vs.size() == 8) { esp->AddPoint(atof(vs[5].c_str()),atof(vs[6].c_str()), atof(vs[7].c_str()),0); } else if (vs.size() > 5) { double x,y,z; if (3 == sscanf(buffer+32,"%10lf%10lf%10lf",&x,&y,&z)) { esp->AddPoint(x,y,z,0); } } } else if (strstr(buffer, "ESP Fit Center") != NULL) { // Data points for ESP calculation tokenize(vs,buffer); if (NULL == esp) esp = new OpenBabel::OBFreeGrid(); if (vs.size() == 9) { esp->AddPoint(atof(vs[6].c_str()),atof(vs[7].c_str()), atof(vs[8].c_str()),0); } else if (vs.size() > 6) { double x,y,z; if (3 == sscanf(buffer+32,"%10lf%10lf%10lf",&x,&y,&z)) { esp->AddPoint(x,y,z,0); } } } else if (strstr(buffer, "Electrostatic Properties (Atomic Units)") != NULL) { int i,np; OpenBabel::OBFreeGridPoint *fgp; OpenBabel::OBFreeGridPointIterator fgpi; for(i=0; (i<5); i++) { ifs.getline(buffer,BUFF_SIZE); // skip line } // Assume file is correct and that potentials are present // where they should. np = esp->NumPoints(); fgpi = esp->BeginPoints(); i = 0; for(fgp = esp->BeginPoint(fgpi); (NULL != fgp); fgp = esp->NextPoint(fgpi)) { ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); if (vs.size() >= 2) { fgp->SetV(atof(vs[2].c_str())); i++; } } if (i == np) { esp->SetAttribute("Electrostatic Potential"); mol.SetData(esp); } else { cout << "Read " << esp->NumPoints() << " ESP points i = " << i << "\n"; } } else if (strstr(buffer, "Charges from ESP fit") != NULL) { hasPartialCharges = true; chargeModel = "ESP"; ifs.getline(buffer,BUFF_SIZE); // Charge / dipole line ifs.getline(buffer,BUFF_SIZE); // column header ifs.getline(buffer,BUFF_SIZE); // real charges tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"-----") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer,"Natural Population") != NULL) { hasPartialCharges = true; chargeModel = "NBO"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // again ifs.getline(buffer,BUFF_SIZE); // again (-----) ifs.getline(buffer,BUFF_SIZE); // real data tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"=====") == NULL) { atom = mol.GetAtom(atoi(vs[1].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer, " Frequencies -- ")) //vibrational frequencies { //The info should appear only once as several blocks starting with this line tokenize(vs, buffer); for(unsigned int i=2; i<vs.size(); ++i) Frequencies.push_back(atof(vs[i].c_str())); ifs.getline(buffer,BUFF_SIZE); //Red. masses ifs.getline(buffer,BUFF_SIZE); //Frc consts ifs.getline(buffer,BUFF_SIZE); //IR Inten tokenize(vs, buffer); for(unsigned int i=3; i<vs.size(); ++i) Intensities.push_back(atof(vs[i].c_str())); ifs.getline(buffer, BUFF_SIZE); // column labels or Raman intensity if(strstr(buffer, "Raman Activ")) { ifs.getline(buffer, BUFF_SIZE); // Depolar (P) ifs.getline(buffer, BUFF_SIZE); // Depolar (U) ifs.getline(buffer, BUFF_SIZE); // column labels } ifs.getline(buffer, BUFF_SIZE); // actual displacement data tokenize(vs, buffer); vector<vector3> vib1, vib2, vib3; double x, y, z; while(vs.size() >= 5) { for (unsigned int i = 2; i < vs.size()-2; i += 3) { x = atof(vs[i].c_str()); y = atof(vs[i+1].c_str()); z = atof(vs[i+2].c_str()); if (i == 2) vib1.push_back(vector3(x, y, z)); else if (i == 5) vib2.push_back(vector3(x, y, z)); else if (i == 8) vib3.push_back(vector3(x, y, z)); } if (!ifs.getline(buffer, BUFF_SIZE)) break; tokenize(vs,buffer); } Lx.push_back(vib1); if (vib2.size()) Lx.push_back(vib2); if (vib3.size()) Lx.push_back(vib3); } else if(strstr(buffer, " This molecule is "))//rotational data { if(strstr(buffer, "asymmetric")) RotorType = OBRotationData::ASYMMETRIC; else if(strstr(buffer, "symmetric")) RotorType = OBRotationData::SYMMETRIC; else if(strstr(buffer, "linear")) RotorType = OBRotationData::LINEAR; else RotorType = OBRotationData::UNKNOWN; ifs.getline(buffer,BUFF_SIZE); //symmetry number tokenize(vs, buffer); RotSymNum = atoi(vs[3].c_str()); } else if(strstr(buffer, "Rotational constant")) { tokenize(vs, buffer); RotConsts.clear(); for (unsigned int i=3; i<vs.size(); ++i) RotConsts.push_back(atof(vs[i].c_str())); } else if(strstr(buffer, "alpha electrons")) // # of electrons / orbital { tokenize(vs, buffer); if (vs.size() == 6) { // # alpha electrons # beta electrons aHOMO = atoi(vs[0].c_str()); bHOMO = atoi(vs[3].c_str()); } } else if(strstr(buffer, "rbital symmetries")) // orbital symmetries { symmetries.clear(); std::string label; // used as a temporary to remove "(" and ")" from labels int iii,offset = 0; bool bDoneSymm; // Extract both Alpha and Beta symmetries ifs.getline(buffer, BUFF_SIZE); // skip the current line for(iii=0; (iii<2); iii++) { if (strstr(buffer, "electronic state")) break; // We've gone too far! while (!ifs.eof() && ((NULL != strstr(buffer,"Alpha")) || (NULL != strstr(buffer,"Beta")))) { // skip the Alpha: and Beta: title lines ifs.getline(buffer, BUFF_SIZE); } do { bDoneSymm = (NULL == strstr(buffer, "(")); if (!bDoneSymm) { tokenize(vs, buffer); if ((NULL != strstr(buffer, "Occupied")) || (NULL != strstr(buffer, "Virtual"))) { offset = 1; // skip first token } else { offset = 0; } for (unsigned int i = offset; i < vs.size(); ++i) { label = vs[i].substr(1, vs[i].length() - 2); symmetries.push_back(label); } ifs.getline(buffer, BUFF_SIZE); // get a new line if we've been reading symmetries } // don't read a new line if we're done with symmetries } while (!ifs.eof() && !bDoneSymm); } // end alpha/beta section } else if (strstr(buffer, "Alpha") && strstr(buffer, ". eigenvalues --")) { orbitals.clear(); betaStart = 0; while (strstr(buffer, ". eigenvalues --")) { tokenize(vs, buffer); if (vs.size() < 4) break; if (vs[0].find("Beta") !=string::npos && betaStart == 0) // mark where we switch from alpha to beta betaStart = orbitals.size(); for (unsigned int i = 4; i < vs.size(); ++i) { orbitals.push_back(atof(vs[i].c_str())); } ifs.getline(buffer, BUFF_SIZE); } } else if(strstr(buffer, " Excited State")) // Force and wavelength data { // The above line appears for each state, so just append the info to the vectors tokenize(vs, buffer); if (vs.size() >= 9) { double wavelength = atof(vs[6].c_str()); double force = atof(vs[8].substr(2).c_str()); // remove the "f=" part Forces.push_back(force); Wavelengths.push_back(wavelength); } } else if(strstr(buffer, " Ground to excited state Transition electric dipole moments (Au):")) // Electronic dipole moments { ifs.getline(buffer, BUFF_SIZE); // Headings ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); EDipole.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(velocity)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsVelocity.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(length)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsLength.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if (strstr(buffer, "Forces (Hartrees/Bohr)")) { ifs.getline(buffer, BUFF_SIZE); // column headers ifs.getline(buffer, BUFF_SIZE); // ------ ifs.getline(buffer, BUFF_SIZE); // real data } else if (strstr(buffer, "Isotropic = ")) // NMR shifts { tokenize(vs, buffer); if (vs.size() >= 4) { atom = mol.GetAtom(atoi(vs[0].c_str())); OBPairData *nmrShift = new OBPairData(); nmrShift->SetAttribute("NMR Isotropic Shift"); string shift = vs[4].c_str(); nmrShift->SetValue(shift); atom->SetData(nmrShift); } } else if(strstr(buffer,"SCF Done:") != NULL) { tokenize(vs,buffer); mol.SetEnergy(atof(vs[4].c_str()) * HARTEE_TO_KCALPERMOL); confEnergies.push_back(mol.GetEnergy()); } /* Temporarily commented out until the handling of energy in OBMol is sorted out // MP2 energies also use a different syntax // PM3 energies use a different syntax else if(strstr(buffer,"E (Thermal)") != NULL) { ifs.getline(buffer,BUFF_SIZE); //Headers ifs.getline(buffer,BUFF_SIZE); //Total energy; what we want tokenize(vs,buffer); mol.SetEnergy(atof(vs[1].c_str())); confEnergies.push_back(mol.GetEnergy()); } */ else if(strstr(buffer,"Standard basis:") != NULL) { add_unique_pairdata_to_mol(&mol,"basis",buffer,2); } else if(strstr(buffer,"Zero-point correction=") != NULL) { tokenize(vs,buffer); ezpe = atof(vs[2].c_str()); ezpe_set = true; } else if(strstr(buffer,"Thermal correction to Enthalpy=") != NULL) { tokenize(vs,buffer); Hcorr = atof(vs[4].c_str()); Hcorr_set = true; } else if(strstr(buffer,"Thermal correction to Gibbs Free Energy=") != NULL) { tokenize(vs,buffer); Gcorr = atof(vs[6].c_str()); Gcorr_set = true; } else if (strstr(buffer,"CV") != NULL) { ifs.getline(buffer,BUFF_SIZE); //Headers ifs.getline(buffer,BUFF_SIZE); //Total heat capacity tokenize(vs,buffer); if (vs.size() == 4) { if (vs[0].compare("Total") == 0) { CV = atof(vs[2].c_str()); CV_set = true; } } ifs.getline(buffer,BUFF_SIZE); //Electronic ifs.getline(buffer,BUFF_SIZE); //Translational tokenize(vs,buffer); if ((vs.size() == 4) && (vs[0].compare("Translational") == 0) ) { Scomponents.push_back(atof(vs[3].c_str())); } ifs.getline(buffer,BUFF_SIZE); //Rotational tokenize(vs,buffer); if ((vs.size() == 4) && (vs[0].compare("Rotational") == 0)) { Scomponents.push_back(atof(vs[3].c_str())); } ifs.getline(buffer,BUFF_SIZE); //Vibrational tokenize(vs,buffer); if ((vs.size() == 4) && (vs[0].compare("Vibrational") == 0)) { Scomponents.push_back(atof(vs[3].c_str())); } } else if ((strstr(buffer,"Temperature=") != NULL) && (strstr(buffer,"Pressure=") != NULL)) { tokenize(vs,buffer); temperature = atof(vs[1].c_str()); } else if (strstr(buffer, "(0 K)") != NULL) { /* This must be the last else */ int i,nsearch; const char *search[] = { "CBS-QB3 (0 K)", "G2(0 K)", "G3(0 K)", "G4(0 K)", "W1BD (0 K)", "W1U (0 K)" }; const char *mymeth[] = { "CBS-QB3", "G2", "G3", "G4", "W1BD", "W1U" }; const int myindex[] = { 3, 2, 2, 2, 3, 3 }; nsearch = sizeof(search)/sizeof(search[0]); for(i=0; (i<nsearch); i++) { if(strstr(buffer,search[i]) != NULL) { tokenize(vs,buffer); E0 = atof(vs[myindex[i]].c_str()); E0_set = 1; thermo_method = mymeth[i]; break; } } } } // end while if (mol.NumAtoms() == 0) { // e.g., if we're at the end of a file PR#1737209 mol.EndModify(); return false; } mol.EndModify(); // Set conformers to all coordinates we adopted // but remove last geometry -- it's a duplicate if (vconf.size() > 1) vconf.pop_back(); mol.SetConformers(vconf); mol.SetConformer(mol.NumConformers() - 1); // Copy the conformer data too confData->SetDimension(confDimensions); confData->SetEnergies(confEnergies); confData->SetForces(confForces); mol.SetData(confData); // Check whether we have data to extract heat of formation. if (ezpe_set && Hcorr_set && Gcorr_set && E0_set && CV_set && (thermo_method.size() > 0)) { extract_thermo(&mol,thermo_method,temperature,ezpe, Hcorr,Gcorr,E0,CV,RotSymNum,Scomponents); } // Attach orbital data, if there is any if (orbitals.size() > 0) { OBOrbitalData *od = new OBOrbitalData; if (aHOMO == bHOMO) { od->LoadClosedShellOrbitals(orbitals, symmetries, aHOMO); } else { // we have to separate the alpha and beta vectors std::vector<double> betaOrbitals; std::vector<std::string> betaSymmetries; unsigned int initialSize = orbitals.size(); unsigned int symmSize = symmetries.size(); if (initialSize != symmSize || betaStart == -1) { cerr << "Inconsistency: orbitals have " << initialSize << " elements while symmetries have " << symmSize << endl; } else { for (unsigned int i = betaStart; i < initialSize; ++i) { betaOrbitals.push_back(orbitals[i]); if (symmetries.size() > 0) betaSymmetries.push_back(symmetries[i]); } // ok, now erase the end elements of orbitals and symmetries for (unsigned int i = betaStart; i < initialSize; ++i) { orbitals.pop_back(); if (symmetries.size() > 0) symmetries.pop_back(); } // and load the alphas and betas od->LoadAlphaOrbitals(orbitals, symmetries, aHOMO); od->LoadBetaOrbitals(betaOrbitals, betaSymmetries, bHOMO); } } od->SetOrigin(fileformatInput); mol.SetData(od); } //Attach vibrational data, if there is any, to molecule if(Frequencies.size()>0) { OBVibrationData* vd = new OBVibrationData; vd->SetData(Lx, Frequencies, Intensities); vd->SetOrigin(fileformatInput); mol.SetData(vd); } //Attach rotational data, if there is any, to molecule if(RotConsts[0]!=0.0) { OBRotationData* rd = new OBRotationData; rd->SetData(RotorType, RotConsts, RotSymNum); rd->SetOrigin(fileformatInput); mol.SetData(rd); } // Attach unit cell translation vectors if found if (numTranslationVectors > 0) { OBUnitCell* uc = new OBUnitCell; uc->SetData(translationVectors[0], translationVectors[1], translationVectors[2]); uc->SetOrigin(fileformatInput); mol.SetData(uc); } //Attach electronic transition data, if there is any, to molecule if(Forces.size() > 0 && Forces.size() == Wavelengths.size()) { OBElectronicTransitionData* etd = new OBElectronicTransitionData; etd->SetData(Wavelengths, Forces); if (EDipole.size() == Forces.size()) etd->SetEDipole(EDipole); if (RotatoryStrengthsLength.size() == Forces.size()) etd->SetRotatoryStrengthsLength(RotatoryStrengthsLength); if (RotatoryStrengthsVelocity.size() == Forces.size()) etd->SetRotatoryStrengthsVelocity(RotatoryStrengthsVelocity); etd->SetOrigin(fileformatInput); mol.SetData(etd); } if (!pConv->IsOption("b",OBConversion::INOPTIONS)) mol.ConnectTheDots(); if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) mol.PerceiveBondOrders(); if (hasPartialCharges) { mol.SetPartialChargesPerceived(); // Annotate that partial charges come from Mulliken OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue(chargeModel); // Mulliken, ESP, etc. dp->SetOrigin(fileformatInput); mol.SetData(dp); } mol.SetTotalCharge(total_charge); mol.SetTotalSpinMultiplicity(spin_multiplicity); mol.SetTitle(title); return(true); }
// Reading Gaussian output has been tested for G98 and G03 to some degree // If you have problems (or examples of older output), please contact // the [email protected] mailing list and/or post a bug bool GaussianOutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* title = pConv->GetTitle(); char buffer[BUFF_SIZE]; string str,str1; double x,y,z; OBAtom *atom; vector<string> vs; int charge = 0; unsigned int spin = 1; bool hasPartialCharges = false; string chargeModel; // descriptor for charges (e.g. "Mulliken") // coordinates of all steps // Set conformers to all coordinates we adopted std::vector<double*> vconf; // index of all frames/conformers std::vector<double> coordinates; // coordinates in each frame int natoms = 0; // number of atoms -- ensure we don't go to a new job with a different molecule // OBConformerData stores information about multiple steps // we can change attribute later if needed (e.g., IRC) OBConformerData *confData = new OBConformerData(); confData->SetOrigin(fileformatInput); std::vector<unsigned short> confDimensions = confData->GetDimension(); // to be fair, set these all to 3D std::vector<double> confEnergies = confData->GetEnergies(); std::vector< std::vector< vector3 > > confForces = confData->GetForces(); //Vibrational data std::vector< std::vector< vector3 > > Lx; std::vector<double> Frequencies, Intensities; //Rotational data std::vector<double> RotConsts(3); int RotSymNum=1; OBRotationData::RType RotorType; // Translation vectors (if present) vector3 translationVectors[3]; int numTranslationVectors = 0; //Electronic Excitation data std::vector<double> Forces, Wavelengths, EDipole, RotatoryStrengthsVelocity, RotatoryStrengthsLength; // Orbital data std::vector<double> orbitals; std::vector<std::string> symmetries; int aHOMO, bHOMO, betaStart; //Put some metadata into OBCommentData string comment("Gaussian "); ifs.getline(buffer,BUFF_SIZE); if(*buffer) { comment += strchr(buffer,'=')+2; comment += ""; for(unsigned i=0; i<115, ifs; ++i) { ifs.getline(buffer,BUFF_SIZE); if(buffer[1]=='#') { //the line describing the method comment += buffer; OBCommentData *cd = new OBCommentData; cd->SetData(comment); cd->SetOrigin(fileformatInput); mol.SetData(cd); break; } } } int i=0; bool no_symmetry=false; char coords_type[25]; //Prescan file to find second instance of "orientation:" //This will be the kind of coords used in the chk/fchk file //Unless the "nosym" keyword has been requested while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"Symmetry turned off by external request.") != NULL) { // The "nosym" keyword has been requested no_symmetry = true; } if (strstr(buffer,"orientation:") !=NULL) { i++; tokenize (vs, buffer); strcpy (coords_type, vs[0].c_str()); strcat (coords_type, " orientation:"); } if ((no_symmetry && i==1) || i==2) break; // Check for the last line of normal output and exit loop, otherwise, // the rewind below will no longer work. if (strstr(buffer,"Normal termination of Gaussian") != NULL) break; } ifs.seekg(0); //rewind mol.BeginModify(); while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"Multiplicity") != NULL) { tokenize(vs, buffer, " \t\n"); if (vs.size() == 6) { charge = atoi(vs[2].c_str()); spin = atoi(vs[5].c_str()); } ifs.getline(buffer,BUFF_SIZE); } else if (strstr(buffer, coords_type) != NULL) { numTranslationVectors = 0; // ignore old translationVectors ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size() == 6) { x = atof((char*)vs[3].c_str()); y = atof((char*)vs[4].c_str()); z = atof((char*)vs[5].c_str()); int atomicNum = atoi((char*)vs[1].c_str()); if (atomicNum > 0) // translation vectors are "-2" { if (natoms == 0) { // first time reading the molecule, create each atom atom = mol.NewAtom(); atom->SetAtomicNum(atoi((char*)vs[1].c_str())); } coordinates.push_back(x); coordinates.push_back(y); coordinates.push_back(z); } else { translationVectors[numTranslationVectors++].Set(x, y, z); } if (!ifs.getline(buffer,BUFF_SIZE)) { break; } tokenize(vs,buffer); } // done with reading atoms natoms = mol.NumAtoms(); // malloc / memcpy double *tmpCoords = new double [(natoms)*3]; memcpy(tmpCoords, &coordinates[0], sizeof(double)*natoms*3); vconf.push_back(tmpCoords); coordinates.clear(); confDimensions.push_back(3); // always 3D -- OBConformerData allows mixing 2D and 3D structures } else if(strstr(buffer,"Dipole moment") != NULL) { ifs.getline(buffer,BUFF_SIZE); // actual components X ### Y #### Z ### tokenize(vs,buffer); if (vs.size() >= 6) { OBVectorData *dipoleMoment = new OBVectorData; dipoleMoment->SetAttribute("Dipole Moment"); double x, y, z; x = atof(vs[1].c_str()); y = atof(vs[3].c_str()); z = atof(vs[5].c_str()); dipoleMoment->SetData(x, y, z); dipoleMoment->SetOrigin(fileformatInput); mol.SetData(dipoleMoment); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Total atomic charges") != NULL || strstr(buffer,"Mulliken atomic charges") != NULL) { hasPartialCharges = true; chargeModel = "Mulliken"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"Sum of ") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if (strstr(buffer, "Charges from ESP fit") != NULL) { hasPartialCharges = true; chargeModel = "ESP"; ifs.getline(buffer,BUFF_SIZE); // Charge / dipole line ifs.getline(buffer,BUFF_SIZE); // column header ifs.getline(buffer,BUFF_SIZE); // real charges tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"-----") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer,"Natural Population") != NULL) { hasPartialCharges = true; chargeModel = "NBO"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // again ifs.getline(buffer,BUFF_SIZE); // again (-----) ifs.getline(buffer,BUFF_SIZE); // real data tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"=====") == NULL) { atom = mol.GetAtom(atoi(vs[1].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer, " Frequencies -- ")) //vibrational frequencies { //The info should appear only once as several blocks starting with this line tokenize(vs, buffer); for(unsigned int i=2; i<vs.size(); ++i) Frequencies.push_back(atof(vs[i].c_str())); ifs.getline(buffer,BUFF_SIZE); //Red. masses ifs.getline(buffer,BUFF_SIZE); //Frc consts ifs.getline(buffer,BUFF_SIZE); //IR Inten tokenize(vs, buffer); for(unsigned int i=3; i<vs.size(); ++i) Intensities.push_back(atof(vs[i].c_str())); ifs.getline(buffer, BUFF_SIZE); // column labels or Raman intensity if(strstr(buffer, "Raman Activ")) { ifs.getline(buffer, BUFF_SIZE); // Depolar (P) ifs.getline(buffer, BUFF_SIZE); // Depolar (U) ifs.getline(buffer, BUFF_SIZE); // column labels } ifs.getline(buffer, BUFF_SIZE); // actual displacement data tokenize(vs, buffer); vector<vector3> vib1, vib2, vib3; double x, y, z; while(vs.size() > 5) { for (unsigned int i = 2; i < vs.size()-2; i += 3) { x = atof(vs[i].c_str()); y = atof(vs[i+1].c_str()); z = atof(vs[i+2].c_str()); if (i == 2) vib1.push_back(vector3(x, y, z)); else if (i == 5) vib2.push_back(vector3(x, y, z)); else if (i == 8) vib3.push_back(vector3(x, y, z)); } if (!ifs.getline(buffer, BUFF_SIZE)) break; tokenize(vs,buffer); } Lx.push_back(vib1); if (vib2.size()) Lx.push_back(vib2); if (vib3.size()) Lx.push_back(vib3); } else if(strstr(buffer, " This molecule is "))//rotational data { if(strstr(buffer, "asymmetric")) RotorType = OBRotationData::ASYMMETRIC; else if(strstr(buffer, "symmetric")) RotorType = OBRotationData::SYMMETRIC; else if(strstr(buffer, "linear")) RotorType = OBRotationData::LINEAR; else RotorType = OBRotationData::UNKNOWN; ifs.getline(buffer,BUFF_SIZE); //symmetry number tokenize(vs, buffer); RotSymNum = atoi(vs[3].c_str()); } else if(strstr(buffer, "Rotational constant")) { tokenize(vs, buffer); RotConsts.clear(); for (unsigned int i=3; i<vs.size(); ++i) RotConsts.push_back(atof(vs[i].c_str())); } else if(strstr(buffer, "alpha electrons")) // # of electrons / orbital { tokenize(vs, buffer); if (vs.size() == 6) { // # alpha electrons # beta electrons aHOMO = atoi(vs[0].c_str()); bHOMO = atoi(vs[3].c_str()); } } else if(strstr(buffer, "rbital symmetries")) // orbital symmetries { symmetries.clear(); std::string label; // used as a temporary to remove "(" and ")" from labels int offset = 0; while(true) { ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); // parse first line "Occupied" ... for (unsigned int i = 1; i < vs.size(); ++i) { label = vs[i].substr(1, vs[i].length() - 2); symmetries.push_back(label); } ifs.getline(buffer, BUFF_SIZE); // Parse remaining lines while (strstr(buffer, "(")) { tokenize(vs, buffer); if (strstr(buffer, "Virtual")) { offset = 1; // skip first token } else { offset = 0; } for (unsigned int i = offset; i < vs.size(); ++i) { label = vs[i].substr(1, vs[i].length() - 2); symmetries.push_back(label); } ifs.getline(buffer, BUFF_SIZE); // get next line } // end parsing symmetry labels if (!strstr(buffer, "Beta")) // no beta orbitals break; } // end alpha/beta section } else if (strstr(buffer, "Alpha") && strstr(buffer, ". eigenvalues --")) { orbitals.clear(); betaStart = 0; while (strstr(buffer, ". eigenvalues --")) { tokenize(vs, buffer); if (vs.size() < 4) break; if (vs[0].find("Beta") !=string::npos && betaStart == 0) // mark where we switch from alpha to beta betaStart = orbitals.size(); for (unsigned int i = 4; i < vs.size(); ++i) { orbitals.push_back(atof(vs[i].c_str())); } ifs.getline(buffer, BUFF_SIZE); } } else if(strstr(buffer, " Excited State")) // Force and wavelength data { // The above line appears for each state, so just append the info to the vectors tokenize(vs, buffer); if (vs.size() == 9) { double wavelength = atof(vs[6].c_str()); double force = atof(vs[8].substr(2).c_str()); Forces.push_back(force); Wavelengths.push_back(wavelength); } } else if(strstr(buffer, " Ground to excited state Transition electric dipole moments (Au):")) // Electronic dipole moments { ifs.getline(buffer, BUFF_SIZE); // Headings ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); EDipole.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(velocity)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsVelocity.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(length)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsLength.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if (strstr(buffer, "Forces (Hartrees/Bohr)")) { ifs.getline(buffer, BUFF_SIZE); // column headers ifs.getline(buffer, BUFF_SIZE); // ------ ifs.getline(buffer, BUFF_SIZE); // real data } else if (strstr(buffer, "Isotropic = ")) // NMR shifts { tokenize(vs, buffer); if (vs.size() >= 4) { atom = mol.GetAtom(atoi(vs[0].c_str())); OBPairData *nmrShift = new OBPairData(); nmrShift->SetAttribute("NMR Isotropic Shift"); string shift = vs[4].c_str(); nmrShift->SetValue(shift); atom->SetData(nmrShift); } } else if(strstr(buffer,"SCF Done:") != NULL) { #define HARTREE_TO_KCAL 627.509469 tokenize(vs,buffer); mol.SetEnergy(atof(vs[4].c_str()) * HARTREE_TO_KCAL); confEnergies.push_back(mol.GetEnergy()); } /* Temporarily commented out until the handling of energy in OBMol is sorted out // MP2 energies also use a different syntax // PM3 energies use a different syntax else if(strstr(buffer,"E (Thermal)") != NULL) { ifs.getline(buffer,BUFF_SIZE); //Headers ifs.getline(buffer,BUFF_SIZE); //Total energy; what we want tokenize(vs,buffer); mol.SetEnergy(atof(vs[1].c_str())); confEnergies.push_back(mol.GetEnergy()); } */ } // end while if (mol.NumAtoms() == 0) { // e.g., if we're at the end of a file PR#1737209 mol.EndModify(); return false; } mol.EndModify(); // Set conformers to all coordinates we adopted // but remove last geometry -- it's a duplicate if (vconf.size() > 1) vconf.pop_back(); mol.SetConformers(vconf); mol.SetConformer(mol.NumConformers() - 1); // Copy the conformer data too confData->SetDimension(confDimensions); confData->SetEnergies(confEnergies); confData->SetForces(confForces); mol.SetData(confData); // Attach orbital data, if there is any if (orbitals.size() > 0) { OBOrbitalData *od = new OBOrbitalData; if (aHOMO == bHOMO) { od->LoadClosedShellOrbitals(orbitals, symmetries, aHOMO); } else { // we have to separate the alpha and beta vectors std::vector<double> betaOrbitals; std::vector<std::string> betaSymmetries; unsigned int initialSize = orbitals.size(); for (unsigned int i = betaStart; i < initialSize; ++i) { betaOrbitals.push_back(orbitals[i]); if (symmetries.size() > 0) betaSymmetries.push_back(symmetries[i]); } // ok, now erase the end elements of orbitals and symmetries for (unsigned int i = betaStart; i < initialSize; ++i) { orbitals.pop_back(); if (symmetries.size() > 0) symmetries.pop_back(); } // and load the alphas and betas od->LoadAlphaOrbitals(orbitals, symmetries, aHOMO); od->LoadBetaOrbitals(betaOrbitals, betaSymmetries, bHOMO); } od->SetOrigin(fileformatInput); mol.SetData(od); } //Attach vibrational data, if there is any, to molecule if(Frequencies.size()>0) { OBVibrationData* vd = new OBVibrationData; vd->SetData(Lx, Frequencies, Intensities); vd->SetOrigin(fileformatInput); mol.SetData(vd); } //Attach rotational data, if there is any, to molecule if(RotConsts[0]!=0.0) { OBRotationData* rd = new OBRotationData; rd->SetData(RotorType, RotConsts, RotSymNum); rd->SetOrigin(fileformatInput); mol.SetData(rd); } // Attach unit cell translation vectors if found if (numTranslationVectors > 0) { OBUnitCell* uc = new OBUnitCell; uc->SetData(translationVectors[0], translationVectors[1], translationVectors[2]); uc->SetOrigin(fileformatInput); mol.SetData(uc); } //Attach electronic transition data, if there is any, to molecule if(Forces.size() > 0 && Forces.size() == Wavelengths.size()) { OBElectronicTransitionData* etd = new OBElectronicTransitionData; etd->SetData(Wavelengths, Forces); if (EDipole.size() == Forces.size()) etd->SetEDipole(EDipole); if (RotatoryStrengthsLength.size() == Forces.size()) etd->SetRotatoryStrengthsLength(RotatoryStrengthsLength); if (RotatoryStrengthsVelocity.size() == Forces.size()) etd->SetRotatoryStrengthsVelocity(RotatoryStrengthsVelocity); etd->SetOrigin(fileformatInput); mol.SetData(etd); } if (!pConv->IsOption("b",OBConversion::INOPTIONS)) mol.ConnectTheDots(); if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) mol.PerceiveBondOrders(); if (hasPartialCharges) { mol.SetPartialChargesPerceived(); // Annotate that partial charges come from Mulliken OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue(chargeModel); // Mulliken, ESP, etc. dp->SetOrigin(fileformatInput); mol.SetData(dp); } mol.SetTotalCharge(charge); mol.SetTotalSpinMultiplicity(spin); mol.SetTitle(title); return(true); }
bool MOL2Format::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; //Old code follows... bool foundAtomLine = false; char buffer[BUFF_SIZE]; char *comment = NULL; string str,str1; vector<string> vstr; int len; mol.BeginModify(); for (;;) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); if (EQn(buffer,"@<TRIPOS>MOLECULE",17)) break; } // OK, just read MOLECULE line int lcount; int natoms,nbonds; bool hasPartialCharges = true; for (lcount=0;;lcount++) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); if (EQn(buffer,"@<TRIPOS>ATOM",13)) { foundAtomLine = true; break; } if (lcount == 0) { tokenize(vstr,buffer); if (!vstr.empty()) mol.SetTitle(buffer); } else if (lcount == 1) sscanf(buffer,"%d%d",&natoms,&nbonds); else if (lcount == 3) // charge descriptions { // Annotate origin of partial charges OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue(buffer); dp->SetOrigin(fileformatInput); mol.SetData(dp); if (strncasecmp(buffer, "NO_CHARGES", 10) == 0) hasPartialCharges = false; } else if (lcount == 4) //energy (?) { tokenize(vstr,buffer); if (!vstr.empty() && vstr.size() == 3) if (vstr[0] == "Energy") mol.SetEnergy(atof(vstr[2].c_str())); } else if (lcount == 5) //comment { if ( buffer[0] ) { len = (int) strlen(buffer)+1; //! @todo allow better multi-line comments // which don't allow ill-formed data to consume memory // Thanks to Andrew Dalke for the pointer if (comment != NULL) delete [] comment; comment = new char [len]; memcpy(comment,buffer,len); } } } if (!foundAtomLine) { mol.EndModify(); mol.Clear(); obErrorLog.ThrowError(__FUNCTION__, "Unable to read Mol2 format file. No atoms found.", obWarning); return(false); } mol.ReserveAtoms(natoms); int i; vector3 v; OBAtom atom; double x,y,z,pcharge; char temp_type[BUFF_SIZE], resname[BUFF_SIZE], atmid[BUFF_SIZE]; int elemno, resnum = -1; ttab.SetFromType("SYB"); for (i = 0;i < natoms;i++) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); sscanf(buffer," %*s %1024s %lf %lf %lf %1024s %d %1024s %lf", atmid, &x,&y,&z, temp_type, &resnum, resname, &pcharge); atom.SetVector(x, y, z); // Handle "CL" and "BR" and other mis-typed atoms str = temp_type; if (strncmp(temp_type, "CL", 2) == 0) { str = "Cl"; } else if (strncmp(temp_type,"BR",2) == 0) { str = "Br"; } else if (strncmp(temp_type,"S.o2", 4) == 02) { str = "S.O2"; } else if (strncmp(temp_type,"S.o", 3) == 0) { str = "S.O"; } else if (strncmp(temp_type,"SI", 2) == 0) { str = "Si"; // The following cases are entries which are not in openbabel/data/types.txt // and should probably be added there } else if (strncmp(temp_type,"S.1", 3) == 0) { str = "S.2"; // no idea what the best type might be here } else if (strncmp(temp_type,"P.", 2) == 0) { str = "P.3"; } else if (strncasecmp(temp_type,"Ti.", 3) == 0) { // e.g. Ti.th str = "Ti"; } else if (strncasecmp(temp_type,"Ru.", 3) == 0) { // e.g. Ru.oh str = "Ru"; } ttab.SetToType("ATN"); ttab.Translate(str1,str); elemno = atoi(str1.c_str()); ttab.SetToType("IDX"); // We might have missed some SI or FE type things above, so here's // another check if( !elemno && isupper(temp_type[1]) ) { temp_type[1] = (char)tolower(temp_type[1]); str = temp_type; ttab.Translate(str1,str); elemno = atoi(str1.c_str()); } // One last check if there isn't a period in the type, // it's a malformed atom type, but it may be the element symbol // GaussView does this (PR#1739905) if ( !elemno ) { obErrorLog.ThrowError(__FUNCTION__, "This Mol2 file is non-standard. Cannot interpret atom types correctly, instead attempting to interpret as elements instead.", obWarning); string::size_type dotPos = str.find('.'); if (dotPos == string::npos) { elemno = etab.GetAtomicNum(str.c_str()); } } atom.SetAtomicNum(elemno); ttab.SetToType("INT"); ttab.Translate(str1,str); atom.SetType(str1); atom.SetPartialCharge(pcharge); if (!mol.AddAtom(atom)) return(false); if (!IsNearZero(pcharge)) hasPartialCharges = true; // Add residue information if it exists if (resnum != -1 && resnum != 0 && strlen(resname) != 0 && strncmp(resname,"<1>", 3) != 0) { OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL; if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum()) != resnum) { vector<OBResidue*>::iterator ri; for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) if (res->GetName() == resname && static_cast<int>(res->GetNum()) == resnum) break; if (res == NULL) { res = mol.NewResidue(); res->SetName(resname); res->SetNum(resnum); } } OBAtom *atomPtr = mol.GetAtom(mol.NumAtoms()); res->AddAtom(atomPtr); res->SetAtomID(atomPtr, atmid); } // end adding residue info } for (;;) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); str = buffer; if (!strncmp(buffer,"@<TRIPOS>BOND",13)) break; } int start,end,order; for (i = 0; i < nbonds; i++) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); sscanf(buffer,"%*d %d %d %1024s",&start,&end,temp_type); str = temp_type; order = 1; if (str == "ar" || str == "AR" || str == "Ar") order = 5; else if (str == "AM" || str == "am" || str == "Am") order = 1; else order = atoi(str.c_str()); mol.AddBond(start,end,order); } // Suggestion by Liu Zhiguo 2008-01-26 // Mol2 files define atom types -- there is no need to re-perceive mol.SetAtomTypesPerceived(); mol.EndModify(); //must add generic data after end modify - otherwise it will be blown away if (comment) { OBCommentData *cd = new OBCommentData; cd->SetData(comment); cd->SetOrigin(fileformatInput); mol.SetData(cd); delete [] comment; comment = NULL; } if (hasPartialCharges) mol.SetPartialChargesPerceived(); /* Disabled due to PR#3048758 -- seekg is very slow with gzipped mol2 // continue untill EOF or untill next molecule record streampos pos; for(;;) { pos = ifs.tellg(); if (!ifs.getline(buffer,BUFF_SIZE)) break; if (EQn(buffer,"@<TRIPOS>MOLECULE",17)) break; } ifs.seekg(pos); // go back to the end of the molecule */ return(true); }
bool MacroModFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* defaultTitle = pConv->GetTitle(); // Get Title char buffer[BUFF_SIZE]; int natoms; vector<vector<pair<int,int> > > connections; if (ifs.getline(buffer,BUFF_SIZE)) { vector<string> vs; tokenize(vs,buffer," \n"); if ( !vs.empty() && vs.size() > 0) sscanf(buffer,"%i%*s",&natoms); if (natoms == 0) return false; if ( !vs.empty() && vs.size() > 1) mol.SetTitle(vs[1]); else { string s = defaultTitle; mol.SetTitle(defaultTitle); } } else return(false); mol.BeginModify(); mol.ReserveAtoms(natoms); connections.resize(natoms+1); /***********************************************************************/ // Get Type Bonds, BondOrder, X, Y, Z double x,y,z; vector3 v; char temp_type[10]; int i,j; double charge; OBAtom atom; ttab.SetFromType("MMD"); for (i = 1; i <= natoms; i++) { if (!ifs.getline(buffer,BUFF_SIZE)) break; int end[6], order[6]; sscanf(buffer,"%9s%d%d%d%d%d%d%d%d%d%d%d%d%lf%lf%lf", temp_type,&end[0],&order[0],&end[1],&order[1],&end[2],&order[2], &end[3], &order[3], &end[4], &order[4], &end[5], &order[5], &x, &y, &z); pair<int,int> tmp; for ( j = 0 ; j <=5 ; j++ ) { if ( end[j] > 0 && end[j] > i) { tmp.first = end[j]; tmp.second = order[j]; connections[i].push_back(tmp); } } v.SetX(x); v.SetY(y); v.SetZ(z); atom.SetVector(v); string str = temp_type,str1; ttab.SetToType("ATN"); ttab.Translate(str1,str); atom.SetAtomicNum(atoi(str1.c_str())); ttab.SetToType("INT"); ttab.Translate(str1,str); atom.SetType(str1); // stuff for optional fields buffer[109]='\0'; sscanf(&buffer[101],"%lf", &charge); atom.SetPartialCharge(charge); mol.AddAtom(atom); } for (i = 1; i <= natoms; i++) for (j = 0; j < (signed)connections[i].size(); j++) mol.AddBond(i, connections[i][j].first, connections[i][j].second); mol.EndModify(); mol.SetPartialChargesPerceived(); // Annotate origin of partial charges OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue("MACROMODEL"); dp->SetOrigin(fileformatInput); mol.SetData(dp); OBBond *bond; vector<OBBond*>::iterator bi; for (bond = mol.BeginBond(bi);bond;bond = mol.NextBond(bi)) if (bond->GetBondOrder() == 5 && !bond->IsInRing()) bond->SetBondOrder(1); if ( natoms != (signed)mol.NumAtoms() ) return(false); // clean out remaining blank lines std::streampos ipos; do { ipos = ifs.tellg(); ifs.getline(buffer,BUFF_SIZE); } while(strlen(buffer) == 0 && !ifs.eof() ); ifs.seekg(ipos); return(true); }