/** Method reads partial charges from input stream (ifs) and writes them to supplied OBMol object (molecule) Input stream must be set to begining of charges table in nwo file. (Line after "Mulliken analysis of the total density") Stream will be set at next line after charges table. If reading charges failed or "molecule" contains data incompatible with read charges then "molecule" wont be changed. */ void NWChemOutputFormat::ReadPartialCharges(istream* ifs, OBMol* molecule) { if ((molecule == NULL) || (ifs == NULL)) return; vector<string> vs; char buffer[BUFF_SIZE]; bool from_scratch = false; vector<int> charges; vector<double> partial_charges; unsigned int natoms = molecule->NumAtoms(); if (natoms == 0) from_scratch = true; ifs->getline(buffer,BUFF_SIZE); // ---- ----- ---- ifs->getline(buffer,BUFF_SIZE); // blank ifs->getline(buffer,BUFF_SIZE); // column headings ifs->getline(buffer,BUFF_SIZE); // ---- ----- ---- ifs->getline(buffer,BUFF_SIZE); tokenize(vs, buffer); // N Symbol Charge PartialCharge+Charge ShellCharges // 0 1 2 3 4,etc unsigned int i = 1; while (vs.size() >= 4) { int charge = atoi(vs[2].c_str()); if (!from_scratch) { if (i > natoms) return; if (molecule->GetAtom(i++)->GetAtomicNum() != charge) return; } else charges.push_back(charge); partial_charges.push_back(atof(vs[3].c_str()) - charge); ifs->getline(buffer,BUFF_SIZE); tokenize(vs, buffer); } if (from_scratch) molecule->ReserveAtoms(partial_charges.size()); else if (partial_charges.size() != natoms) return; for(unsigned int j=0;j<partial_charges.size();j++) { OBAtom* atom; if (from_scratch) { atom = molecule->NewAtom(); atom->SetAtomicNum(charges[j]); } else { atom = molecule->GetAtom(j+1); } atom->SetPartialCharge(partial_charges[j]); } }
bool MOL2Format::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; //Old code follows... bool foundAtomLine = false; char buffer[BUFF_SIZE]; char *comment = NULL; string str,str1; vector<string> vstr; int len; mol.BeginModify(); for (;;) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); if (EQn(buffer,"@<TRIPOS>MOLECULE",17)) break; } int lcount; int natoms,nbonds; for (lcount=0;;lcount++) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); if (EQn(buffer,"@<TRIPOS>ATOM",13)) { foundAtomLine = true; break; } if (lcount == 0) { tokenize(vstr,buffer); if (!vstr.empty()) mol.SetTitle(buffer); } else if (lcount == 1) sscanf(buffer,"%d%d",&natoms,&nbonds); else if (lcount == 4) //energy { tokenize(vstr,buffer); if (!vstr.empty() && vstr.size() == 3) if (vstr[0] == "Energy") mol.SetEnergy(atof(vstr[2].c_str())); } else if (lcount == 5) //comment { if ( buffer[0] ) { len = (int) strlen(buffer)+1; // TODO allow better multi-line comments // which don't allow ill-formed data to consume memory // Thanks to Andrew Dalke for the pointer if (comment != NULL) delete [] comment; comment = new char [len]; memcpy(comment,buffer,len); } } } if (!foundAtomLine) { mol.EndModify(); mol.Clear(); obErrorLog.ThrowError(__FUNCTION__, "Unable to read Mol2 format file. No atoms found.", obWarning); return(false); } mol.ReserveAtoms(natoms); int i; vector3 v; OBAtom atom; bool hasPartialCharges=false; double x,y,z,pcharge; char temp_type[BUFF_SIZE], resname[BUFF_SIZE], atmid[BUFF_SIZE]; int elemno, resnum = -1; ttab.SetFromType("SYB"); for (i = 0;i < natoms;i++) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); sscanf(buffer," %*s %1024s %lf %lf %lf %1024s %d %1024s %lf", atmid, &x,&y,&z, temp_type, &resnum, resname, &pcharge); atom.SetVector(x, y, z); // Handle "CL" and "BR" and other mis-typed atoms str = temp_type; if (strncmp(temp_type, "CL", 2) == 0) { str = "Cl"; } else if (strncmp(temp_type,"BR",2) == 0) { str = "Br"; } else if (strncmp(temp_type,"S.o2", 4) == 02) { str = "S.O2"; } else if (strncmp(temp_type,"S.o", 3) == 0) { str = "S.O"; } else if (strncmp(temp_type,"SI", 2) == 0) { str = "Si"; // The following cases are entries which are not in openbabel/data/types.txt // and should probably be added there } else if (strncmp(temp_type,"S.1", 3) == 0) { str = "S.2"; // no idea what the best type might be here } else if (strncmp(temp_type,"P.", 2) == 0) { str = "P.3"; } else if (strncasecmp(temp_type,"Ti.", 3) == 0) { // e.g. Ti.th str = "Ti"; } else if (strncasecmp(temp_type,"Ru.", 3) == 0) { // e.g. Ru.oh str = "Ru"; } ttab.SetToType("ATN"); ttab.Translate(str1,str); elemno = atoi(str1.c_str()); ttab.SetToType("IDX"); // We might have missed some SI or FE type things above, so here's // another check if( !elemno && isupper(temp_type[1]) ) { temp_type[1] = (char)tolower(temp_type[1]); str = temp_type; ttab.Translate(str1,str); elemno = atoi(str1.c_str()); } // One last check if there isn't a period in the type, // it's a malformed atom type, but it may be the element symbol // GaussView does this (PR#1739905) if ( !elemno ) { obErrorLog.ThrowError(__FUNCTION__, "This Mol2 file is non-standard. Cannot interpret atom types correctly, instead attempting to interpret as elements instead.", obWarning); string::size_type dotPos = str.find('.'); if (dotPos == string::npos) { elemno = etab.GetAtomicNum(str.c_str()); } } atom.SetAtomicNum(elemno); ttab.SetToType("INT"); ttab.Translate(str1,str); atom.SetType(str1); atom.SetPartialCharge(pcharge); if (!mol.AddAtom(atom)) return(false); if (!IsNearZero(pcharge)) hasPartialCharges = true; // Add residue information if it exists if (resnum != -1 && resnum != 0 && strlen(resname) != 0 && strncmp(resname,"<1>", 3) != 0) { OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL; if (res == NULL || res->GetName() != resname || static_cast<int>(res->GetNum()) != resnum) { vector<OBResidue*>::iterator ri; for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) if (res->GetName() == resname && static_cast<int>(res->GetNum()) == resnum) break; if (res == NULL) { res = mol.NewResidue(); res->SetName(resname); res->SetNum(resnum); } } OBAtom *atomPtr = mol.GetAtom(mol.NumAtoms()); res->AddAtom(atomPtr); res->SetAtomID(atomPtr, atmid); } // end adding residue info } for (;;) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); str = buffer; if (!strncmp(buffer,"@<TRIPOS>BOND",13)) break; } int start,end,order; for (i = 0; i < nbonds; i++) { if (!ifs.getline(buffer,BUFF_SIZE)) return(false); sscanf(buffer,"%*d %d %d %1024s",&start,&end,temp_type); str = temp_type; order = 1; if (str == "ar" || str == "AR" || str == "Ar") order = 5; else if (str == "AM" || str == "am" || str == "Am") order = 1; else order = atoi(str.c_str()); mol.AddBond(start,end,order); } // update neighbour bonds information for each atom. vector<OBAtom*>::iterator apos; vector<OBBond*>::iterator bpos; OBAtom* patom; OBBond* pbond; for (patom = mol.BeginAtom(apos); patom; patom = mol.NextAtom(apos)) { patom->ClearBond(); for (pbond = mol.BeginBond(bpos); pbond; pbond = mol.NextBond(bpos)) { if (patom == pbond->GetBeginAtom() || patom == pbond->GetEndAtom()) { patom->AddBond(pbond); } } } // Suggestion by Liu Zhiguo 2008-01-26 // Mol2 files define atom types -- there is no need to re-perceive mol.SetAtomTypesPerceived(); mol.EndModify(); //must add generic data after end modify - otherwise it will be blown away if (comment) { OBCommentData *cd = new OBCommentData; cd->SetData(comment); cd->SetOrigin(fileformatInput); mol.SetData(cd); delete [] comment; comment = NULL; } if (hasPartialCharges) mol.SetPartialChargesPerceived(); // continue untill EOF or untill next molecule record streampos pos; for(;;) { pos = ifs.tellg(); if (!ifs.getline(buffer,BUFF_SIZE)) break; if (EQn(buffer,"@<TRIPOS>MOLECULE",17)) break; } ifs.seekg(pos); // go back to the end of the molecule return(true); }
bool MacroModFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* defaultTitle = pConv->GetTitle(); // Get Title char buffer[BUFF_SIZE]; int natoms; vector<vector<pair<int,int> > > connections; if (ifs.getline(buffer,BUFF_SIZE)) { vector<string> vs; tokenize(vs,buffer," \n"); if ( !vs.empty() && vs.size() > 0) sscanf(buffer,"%i%*s",&natoms); if (natoms == 0) return false; if ( !vs.empty() && vs.size() > 1) mol.SetTitle(vs[1]); else { string s = defaultTitle; mol.SetTitle(defaultTitle); } } else return(false); mol.BeginModify(); mol.ReserveAtoms(natoms); connections.resize(natoms+1); /***********************************************************************/ // Get Type Bonds, BondOrder, X, Y, Z double x,y,z; vector3 v; char temp_type[10]; int i,j; double charge; OBAtom atom; ttab.SetFromType("MMD"); for (i = 1; i <= natoms; i++) { if (!ifs.getline(buffer,BUFF_SIZE)) break; int end[6], order[6]; sscanf(buffer,"%9s%d%d%d%d%d%d%d%d%d%d%d%d%lf%lf%lf", temp_type,&end[0],&order[0],&end[1],&order[1],&end[2],&order[2], &end[3], &order[3], &end[4], &order[4], &end[5], &order[5], &x, &y, &z); pair<int,int> tmp; for ( j = 0 ; j <=5 ; j++ ) { if ( end[j] > 0 && end[j] > i) { tmp.first = end[j]; tmp.second = order[j]; connections[i].push_back(tmp); } } v.SetX(x); v.SetY(y); v.SetZ(z); atom.SetVector(v); string str = temp_type,str1; ttab.SetToType("ATN"); ttab.Translate(str1,str); atom.SetAtomicNum(atoi(str1.c_str())); ttab.SetToType("INT"); ttab.Translate(str1,str); atom.SetType(str1); // stuff for optional fields buffer[109]='\0'; sscanf(&buffer[101],"%lf", &charge); atom.SetPartialCharge(charge); mol.AddAtom(atom); } for (i = 1; i <= natoms; i++) for (j = 0; j < (signed)connections[i].size(); j++) mol.AddBond(i, connections[i][j].first, connections[i][j].second); mol.EndModify(); OBBond *bond; vector<OBBond*>::iterator bi; for (bond = mol.BeginBond(bi);bond;bond = mol.NextBond(bi)) if (bond->GetBO() == 5 && !bond->IsInRing()) bond->SetBO(1); if ( natoms != (signed)mol.NumAtoms() ) return(false); // clean out remaining blank lines while(ifs.peek() != EOF && ifs.good() && (ifs.peek() == '\n' || ifs.peek() == '\r')) ifs.getline(buffer,BUFF_SIZE); return(true); }
bool CARFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* title = pConv->GetTitle(); bool hasPartialCharges = false; char buffer[BUFF_SIZE]; string str; double x,y,z; OBAtom *atom; vector<string> vs; mol.BeginModify(); while (ifs.getline(buffer,BUFF_SIZE)) { if(strstr(buffer,"end") != NULL) { if (mol.NumAtoms() > 0) // we've already read in a molecule, so exit break; // else, we hit the end of the previous molecular system // (in a multimolecule file) ifs.getline(buffer,BUFF_SIZE); // title ifs.getline(buffer,BUFF_SIZE); // DATE } if (strncmp(buffer, "!BIOSYM", 7) == 0) { continue; } if(strstr(buffer,"PBC") != NULL) { if(strstr(buffer,"ON") != NULL) { ifs.getline(buffer,BUFF_SIZE); // title ifs.getline(buffer,BUFF_SIZE); // DATE ifs.getline(buffer,BUFF_SIZE); // PBC a b c alpha beta gamma SG // parse cell parameters tokenize(vs,buffer); if (vs.size() == 8) { //parse cell values double A,B,C,Alpha,Beta,Gamma; A = atof((char*)vs[1].c_str()); B = atof((char*)vs[2].c_str()); C = atof((char*)vs[3].c_str()); Alpha = atof((char*)vs[4].c_str()); Beta = atof((char*)vs[5].c_str()); Gamma = atof((char*)vs[6].c_str()); OBUnitCell *uc = new OBUnitCell; uc->SetOrigin(fileformatInput); uc->SetData(A, B, C, Alpha, Beta, Gamma); uc->SetSpaceGroup(vs[7]); mol.SetData(uc); } } else // PBC=OFF { ifs.getline(buffer,BUFF_SIZE); // title ifs.getline(buffer,BUFF_SIZE); // !DATE } continue; } // PBC // reading real data! tokenize(vs,buffer); if (vs.size() < 8) { break; } atom = mol.NewAtom(); atom->SetAtomicNum(etab.GetAtomicNum(vs[7].c_str())); x = atof((char*)vs[1].c_str()); y = atof((char*)vs[2].c_str()); z = atof((char*)vs[3].c_str()); atom->SetVector(x,y,z); // vs[0] contains atom label // vs[4] contains "type of residue containing atom" // vs[5] contains "residue sequence name" // vs[6] contains "potential type of atom" if (vs.size() == 9) { atom->SetPartialCharge(atof((char*)vs[8].c_str())); hasPartialCharges = true; } } if (!pConv->IsOption("b",OBConversion::INOPTIONS)) mol.ConnectTheDots(); if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) mol.PerceiveBondOrders(); mol.EndModify(); if (hasPartialCharges) mol.SetPartialChargesPerceived(); mol.SetTitle(title); return(true); }
//! \return whether partial charges were successfully assigned to this molecule bool EQEqCharges::ComputeCharges(OBMol &mol) { int i, j, a, c, N = mol.NumAtoms(); double cellVolume; VectorXf chi(N), J(N), b(N), x(N); MatrixXf J_ij(N, N), A(N, N); OBUnitCell *obuc; matrix3x3 unitcell, fourier; vector3 dx; int numNeighbors[3]; OBAtom *atom; // If parameters have not yet been loaded, do that if (!_paramFileLoaded) { if (ParseParamFile()) { _paramFileLoaded = true; } else { return false; } } // Calculate atomic properties based around their ionic charge for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); a = atom->GetAtomicNum(); c = _chargeCenter[a]; // Fail if ionization data is missing for any atom in the molecule if (_ionizations[a][c + 1] == -1 || _ionizations[a][c] == -1 || a > TABLE_OF_ELEMENTS_SIZE) { obErrorLog.ThrowError(__FUNCTION__, "Insufficient ionization data for atoms in the given molecule. Update `data/eqeqIonizations.txt` with missing information and re-run this function.", obError); return false; } J(i) = _ionizations[a][c + 1] - _ionizations[a][c]; chi(i) = 0.5 * (_ionizations[a][c + 1] + _ionizations[a][c]) - (a == 1? 0 : c * J(i)); } // If a unit cell is defined, use the periodic Ewald calculation if (mol.HasData(OBGenericDataType::UnitCell)) { // Get unit cell and calculate its Fourier transform + volume obuc = (OBUnitCell *) mol.GetData(OBGenericDataType::UnitCell); unitcell = obuc->GetCellMatrix(); fourier = (2 * PI * unitcell.inverse()).transpose(); cellVolume = obuc->GetCellVolume(); // Get the number of radial unit cells to use in x, y, and z numNeighbors[0] = int(ceil(minCellLength / (2.0 * (obuc->GetA())))) - 1; numNeighbors[1] = int(ceil(minCellLength / (2.0 * (obuc->GetB())))) - 1; numNeighbors[2] = int(ceil(minCellLength / (2.0 * (obuc->GetC())))) - 1; for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); for (j = 0; j < N; j++) { dx = atom->GetVector() - (mol.GetAtom(j + 1))->GetVector(); J_ij(i, j) = GetPeriodicEwaldJij(J(i), J(j), dx, (i == j), unitcell, fourier, cellVolume, numNeighbors); } } // If no unit cell, use the simplified nonperiodic calculation } else { for (i = 0; i < N; i++) { atom = mol.GetAtom(i + 1); for (j = 0; j < N; j++) { J_ij(i, j) = GetNonperiodicJij(J(i), J(j), atom->GetDistance(j + 1), (i == j)); } return false; } } // Formulate problem as A x = b, where x is the calculated partial charges // First equation is a simple overall balance: sum(Q) = 0 A.row(0) = VectorXf::Ones(N); b(0) = 0; // Remaining equations are based off of the fact that, at equilibrium, the // energy of the system changes equally for a change in any charge: // dE/dQ_1 = dE/dQ_2 = ... = dE/dQ_N A.block(1, 0, N - 1, N) = J_ij.block(0, 0, N - 1, N) - J_ij.block(1, 0, N - 1, N); b.tail(N - 1) = chi.tail(N - 1) - chi.head(N - 1); // The solution is a list of charges in the system x = A.colPivHouseholderQr().solve(b); // Now we are done calculating, pass all this back to OpenBabel molecule mol.SetPartialChargesPerceived(); OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue("EQEq"); dp->SetOrigin(perceived); mol.SetData(dp); m_partialCharges.clear(); m_partialCharges.reserve(N); m_formalCharges.clear(); m_formalCharges.reserve(N); for (i = 0; i < N; i ++) { atom = mol.GetAtom(i + 1); atom->SetPartialCharge(x(i)); m_partialCharges.push_back(x(i)); m_formalCharges.push_back(atom->GetFormalCharge()); } obErrorLog.ThrowError(__FUNCTION__, "EQEq charges successfully assigned.", obInfo); return true; }
bool EEMCharges::ComputeCharges(OBMol &mol) { mol.SetPartialChargesPerceived(); if(_parameters.empty()) _loadParameters(); // Copied from spectrophore.cpp // CHI and ETA unsigned int _nAtoms = mol.NumAtoms(); unsigned int dim(_nAtoms + 1); std::vector<double> CHI(dim); double** ETA = new double*[dim]; for (unsigned int i = 0; i < dim; ++i) { ETA[i] = new double[dim]; } double totalCharge(0.0); unsigned int i(0); double hardness; double electronegativity; for (OpenBabel::OBMolAtomIter atom(mol); atom; atom++, i++) { int n = atom->GetAtomicNum(); int b = atom->HighestBondOrder(); // Search for parameters for a particular atom type bool found = false; for(unsigned int j = 0; j < _parameters.size(); j++) { if((_parameters[j].Z == n && _parameters[j].bond_order == b) || (_parameters[j].Z == n && _parameters[j].bond_order == - 1) || (_parameters[j].Z == -1 && _parameters[j].bond_order == -1)) { electronegativity = _parameters[j].A; hardness = _parameters[j].B; found = true; break; } } if(!found) { std::stringstream ss; ss << "No parameters found for: " << etab.GetSymbol(n) << " " << b << ". EEM charges were not calculated for the molecule." << std::endl; obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError); return false; } CHI[i] = -electronegativity; ETA[i][i] = hardness; // Adjust the total molecular charge totalCharge += atom->GetFormalCharge(); } // Complete CHI CHI[_nAtoms] = totalCharge; // Complete ETA OBAtom *rAtom, *cAtom; for (unsigned int r = 0; r < _nAtoms; ++r) { rAtom = mol.GetAtom(r+1); // Atom index for (unsigned int c = r + 1; c < _nAtoms; ++c) { cAtom = mol.GetAtom(c+1); // Atom index ETA[r][c] = _kappa / cAtom->GetDistance(rAtom); ETA[c][r] = ETA[r][c]; } } for (unsigned int i = 0; i < dim; ++i) { ETA[i][_nAtoms] = -1.0; ETA[_nAtoms][i] = +1.0; } ETA[_nAtoms][_nAtoms] = 0.0; // Solve the matrix equation _solveMatrix(ETA, &(CHI[0]), dim); // CHI will contain the values OBAtom *atom; for (unsigned int i = 0; i < _nAtoms; ++i) { atom = mol.GetAtom(i+1); // atom index issue atom->SetPartialCharge(CHI[i]); } OBChargeModel::FillChargeVectors(mol); // Cleanup for(unsigned int i = 0; i < dim; i++) delete [] ETA[i]; delete [] ETA; return true; }
bool HINFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* title = pConv->GetTitle(); // Right now only read in the first molecule int i; int max, bo; char buffer[BUFF_SIZE]; string str,str1; double x,y,z; OBAtom *atom; vector<string> vs; ifs.getline(buffer, BUFF_SIZE); while (ifs.good() && (strstr(buffer,"mol") == NULL || buffer[0]==';') ) //The "mol" in comment line should be ignored. { ifs.getline(buffer, BUFF_SIZE); if (ifs.peek() == EOF || !ifs.good()) return false; } ifs.getline(buffer, BUFF_SIZE); if (!ifs.good()) return false; // ended early mol.BeginModify(); while (ifs.good() && strstr(buffer,"endmol") == NULL) { if(buffer[0]==';'){ ifs.getline(buffer, BUFF_SIZE); continue; //The comment Line in HIN should be ignored. } tokenize(vs,buffer); // Don't really know how long it'll be if (vs.size() < 11) { ifs.getline(buffer, BUFF_SIZE); continue; } atom = mol.NewAtom(); atom->SetAtomicNum(etab.GetAtomicNum(vs[3].c_str())); atom->SetPartialCharge(atof(vs[6].c_str())); x = atof((char*)vs[7].c_str()); y = atof((char*)vs[8].c_str()); z = atof((char*)vs[9].c_str()); atom->SetVector(x,y,z); max = 11 + 2 * atoi((char *)vs[10].c_str()); for (i = 11; i < max; i+=2) { switch(((char*)vs[i+1].c_str())[0]) // First char in next token { case 's': bo = 1; break; case 'd': bo = 2; break; case 't': bo = 3; break; case 'a': bo = 5; break; default : bo = 1; break; } mol.AddBond(mol.NumAtoms(), atoi((char *)vs[i].c_str()), bo); } ifs.getline(buffer, BUFF_SIZE); } // clean out remaining blank lines while(ifs.peek() != EOF && ifs.good() && (ifs.peek() == '\n' || ifs.peek() == '\r')) ifs.getline(buffer,BUFF_SIZE); mol.EndModify(); mol.SetTitle(title); mol.SetPartialChargesPerceived(); return(true); }
// Reading Gaussian output has been tested for G98 and G03 to some degree // If you have problems (or examples of older output), please contact // the [email protected] mailing list and/or post a bug bool GaussianOutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* title = pConv->GetTitle(); char buffer[BUFF_SIZE]; string str,str1,str2,thermo_method; double x,y,z; OBAtom *atom; vector<string> vs,vs2; int total_charge = 0; unsigned int spin_multiplicity = 1; bool hasPartialCharges = false; string chargeModel; // descriptor for charges (e.g. "Mulliken") // Variable for G2/G3/G4 etc. calculations double ezpe,Hcorr,Gcorr,E0,CV; bool ezpe_set=false,Hcorr_set=false,Gcorr_set=false,E0_set=false,CV_set=false; double temperature = 0; /* Kelvin */ std::vector<double> Scomponents; // Electrostatic potential OBFreeGrid *esp = NULL; // coordinates of all steps // Set conformers to all coordinates we adopted std::vector<double*> vconf; // index of all frames/conformers std::vector<double> coordinates; // coordinates in each frame int natoms = 0; // number of atoms -- ensure we don't go to a new job with a different molecule // OBConformerData stores information about multiple steps // we can change attribute later if needed (e.g., IRC) OBConformerData *confData = new OBConformerData(); confData->SetOrigin(fileformatInput); std::vector<unsigned short> confDimensions = confData->GetDimension(); // to be fair, set these all to 3D std::vector<double> confEnergies = confData->GetEnergies(); std::vector< std::vector< vector3 > > confForces = confData->GetForces(); //Vibrational data std::vector< std::vector< vector3 > > Lx; std::vector<double> Frequencies, Intensities; //Rotational data std::vector<double> RotConsts(3); int RotSymNum=1; OBRotationData::RType RotorType = OBRotationData::UNKNOWN; // Translation vectors (if present) vector3 translationVectors[3]; int numTranslationVectors = 0; //Electronic Excitation data std::vector<double> Forces, Wavelengths, EDipole, RotatoryStrengthsVelocity, RotatoryStrengthsLength; // Orbital data std::vector<double> orbitals; std::vector<std::string> symmetries; int aHOMO, bHOMO, betaStart; aHOMO = bHOMO = betaStart = -1; int i=0; bool no_symmetry=false; char coords_type[25]; //Prescan file to find second instance of "orientation:" //This will be the kind of coords used in the chk/fchk file //Unless the "nosym" keyword has been requested while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"Symmetry turned off by external request.") != NULL) { // The "nosym" keyword has been requested no_symmetry = true; } if (strstr(buffer,"orientation:") !=NULL) { i++; tokenize (vs, buffer); // gotta check what types of orientation are present strncpy (coords_type, vs[0].c_str(), 24); strcat (coords_type, " orientation:"); } if ((no_symmetry && i==1) || i==2) break; } // Reset end-of-file pointers etc. ifs.clear(); ifs.seekg(0); //rewind mol.BeginModify(); while (ifs.getline(buffer,BUFF_SIZE)) { if(strstr(buffer, "Entering Gaussian") != NULL) { //Put some metadata into OBCommentData string comment("Gaussian "); if(NULL != strchr(buffer,'=')) { comment += strchr(buffer,'=')+2; comment += ""; for(unsigned i=0; i<115 && ifs; ++i) { ifs.getline(buffer,BUFF_SIZE); if(strstr(buffer,"Revision") != NULL) { if (buffer[strlen(buffer)-1] == ',') { buffer[strlen(buffer)-1] = '\0'; } add_unique_pairdata_to_mol(&mol,"program",buffer,0); } else if(buffer[1]=='#') { //the line describing the method comment += buffer; OBCommentData *cd = new OBCommentData; cd->SetData(comment); cd->SetOrigin(fileformatInput); mol.SetData(cd); tokenize(vs,buffer); if (vs.size() > 1) { char *str = strdup(vs[1].c_str()); char *ptr = strchr(str,'/'); if (NULL != ptr) { *ptr = ' '; add_unique_pairdata_to_mol(&mol,"basis",ptr,0); *ptr = '\0'; add_unique_pairdata_to_mol(&mol,"method",str,0); } } break; } } } } else if (strstr(buffer,"Multiplicity") != NULL) { tokenize(vs, buffer, " \t\n"); if (vs.size() == 6) { total_charge = atoi(vs[2].c_str()); spin_multiplicity = atoi(vs[5].c_str()); } ifs.getline(buffer,BUFF_SIZE); } else if (strstr(buffer, coords_type) != NULL) { numTranslationVectors = 0; // ignore old translationVectors ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size()>4) { int corr = vs.size()==5 ? -1 : 0; //g94; later versions have an extra column x = atof((char*)vs[3+corr].c_str()); y = atof((char*)vs[4+corr].c_str()); z = atof((char*)vs[5+corr].c_str()); int atomicNum = atoi((char*)vs[1].c_str()); if (atomicNum > 0) // translation vectors are "-2" { if (natoms == 0) { // first time reading the molecule, create each atom atom = mol.NewAtom(); atom->SetAtomicNum(atoi((char*)vs[1].c_str())); } coordinates.push_back(x); coordinates.push_back(y); coordinates.push_back(z); } else { translationVectors[numTranslationVectors++].Set(x, y, z); } if (!ifs.getline(buffer,BUFF_SIZE)) { break; } tokenize(vs,buffer); } // done with reading atoms natoms = mol.NumAtoms(); if(natoms==0) return false; // malloc / memcpy double *tmpCoords = new double [(natoms)*3]; memcpy(tmpCoords, &coordinates[0], sizeof(double)*natoms*3); vconf.push_back(tmpCoords); coordinates.clear(); confDimensions.push_back(3); // always 3D -- OBConformerData allows mixing 2D and 3D structures } else if(strstr(buffer,"Dipole moment") != NULL) { ifs.getline(buffer,BUFF_SIZE); // actual components X ### Y #### Z ### tokenize(vs,buffer); if (vs.size() >= 6) { OBVectorData *dipoleMoment = new OBVectorData; dipoleMoment->SetAttribute("Dipole Moment"); double x, y, z; x = atof(vs[1].c_str()); y = atof(vs[3].c_str()); z = atof(vs[5].c_str()); dipoleMoment->SetData(x, y, z); dipoleMoment->SetOrigin(fileformatInput); mol.SetData(dipoleMoment); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Traceless Quadrupole moment") != NULL) { ifs.getline(buffer,BUFF_SIZE); // actual components XX ### YY #### ZZ ### tokenize(vs,buffer); ifs.getline(buffer,BUFF_SIZE); // actual components XY ### XZ #### YZ ### tokenize(vs2,buffer); if ((vs.size() >= 6) && (vs2.size() >= 6)) { double Q[3][3]; OpenBabel::OBMatrixData *quadrupoleMoment = new OpenBabel::OBMatrixData; Q[0][0] = atof(vs[1].c_str()); Q[1][1] = atof(vs[3].c_str()); Q[2][2] = atof(vs[5].c_str()); Q[1][0] = Q[0][1] = atof(vs2[1].c_str()); Q[2][0] = Q[0][2] = atof(vs2[3].c_str()); Q[2][1] = Q[1][2] = atof(vs2[5].c_str()); matrix3x3 quad(Q); quadrupoleMoment->SetAttribute("Traceless Quadrupole Moment"); quadrupoleMoment->SetData(quad); quadrupoleMoment->SetOrigin(fileformatInput); mol.SetData(quadrupoleMoment); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Exact polarizability") != NULL) { // actual components XX, YX, YY, XZ, YZ, ZZ tokenize(vs,buffer); if (vs.size() >= 8) { double Q[3][3]; OpenBabel::OBMatrixData *pol_tensor = new OpenBabel::OBMatrixData; Q[0][0] = atof(vs[2].c_str()); Q[1][1] = atof(vs[4].c_str()); Q[2][2] = atof(vs[7].c_str()); Q[1][0] = Q[0][1] = atof(vs[3].c_str()); Q[2][0] = Q[0][2] = atof(vs[5].c_str()); Q[2][1] = Q[1][2] = atof(vs[6].c_str()); matrix3x3 pol(Q); pol_tensor->SetAttribute("Exact polarizability"); pol_tensor->SetData(pol); pol_tensor->SetOrigin(fileformatInput); mol.SetData(pol_tensor); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Total atomic charges") != NULL || strstr(buffer,"Mulliken atomic charges") != NULL) { hasPartialCharges = true; chargeModel = "Mulliken"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"Sum of ") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if (strstr(buffer, "Atomic Center") != NULL) { // Data points for ESP calculation tokenize(vs,buffer); if (NULL == esp) esp = new OpenBabel::OBFreeGrid(); if (vs.size() == 8) { esp->AddPoint(atof(vs[5].c_str()),atof(vs[6].c_str()), atof(vs[7].c_str()),0); } else if (vs.size() > 5) { double x,y,z; if (3 == sscanf(buffer+32,"%10lf%10lf%10lf",&x,&y,&z)) { esp->AddPoint(x,y,z,0); } } } else if (strstr(buffer, "ESP Fit Center") != NULL) { // Data points for ESP calculation tokenize(vs,buffer); if (NULL == esp) esp = new OpenBabel::OBFreeGrid(); if (vs.size() == 9) { esp->AddPoint(atof(vs[6].c_str()),atof(vs[7].c_str()), atof(vs[8].c_str()),0); } else if (vs.size() > 6) { double x,y,z; if (3 == sscanf(buffer+32,"%10lf%10lf%10lf",&x,&y,&z)) { esp->AddPoint(x,y,z,0); } } } else if (strstr(buffer, "Electrostatic Properties (Atomic Units)") != NULL) { int i,np; OpenBabel::OBFreeGridPoint *fgp; OpenBabel::OBFreeGridPointIterator fgpi; for(i=0; (i<5); i++) { ifs.getline(buffer,BUFF_SIZE); // skip line } // Assume file is correct and that potentials are present // where they should. np = esp->NumPoints(); fgpi = esp->BeginPoints(); i = 0; for(fgp = esp->BeginPoint(fgpi); (NULL != fgp); fgp = esp->NextPoint(fgpi)) { ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); if (vs.size() >= 2) { fgp->SetV(atof(vs[2].c_str())); i++; } } if (i == np) { esp->SetAttribute("Electrostatic Potential"); mol.SetData(esp); } else { cout << "Read " << esp->NumPoints() << " ESP points i = " << i << "\n"; } } else if (strstr(buffer, "Charges from ESP fit") != NULL) { hasPartialCharges = true; chargeModel = "ESP"; ifs.getline(buffer,BUFF_SIZE); // Charge / dipole line ifs.getline(buffer,BUFF_SIZE); // column header ifs.getline(buffer,BUFF_SIZE); // real charges tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"-----") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer,"Natural Population") != NULL) { hasPartialCharges = true; chargeModel = "NBO"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // again ifs.getline(buffer,BUFF_SIZE); // again (-----) ifs.getline(buffer,BUFF_SIZE); // real data tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"=====") == NULL) { atom = mol.GetAtom(atoi(vs[1].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer, " Frequencies -- ")) //vibrational frequencies { //The info should appear only once as several blocks starting with this line tokenize(vs, buffer); for(unsigned int i=2; i<vs.size(); ++i) Frequencies.push_back(atof(vs[i].c_str())); ifs.getline(buffer,BUFF_SIZE); //Red. masses ifs.getline(buffer,BUFF_SIZE); //Frc consts ifs.getline(buffer,BUFF_SIZE); //IR Inten tokenize(vs, buffer); for(unsigned int i=3; i<vs.size(); ++i) Intensities.push_back(atof(vs[i].c_str())); ifs.getline(buffer, BUFF_SIZE); // column labels or Raman intensity if(strstr(buffer, "Raman Activ")) { ifs.getline(buffer, BUFF_SIZE); // Depolar (P) ifs.getline(buffer, BUFF_SIZE); // Depolar (U) ifs.getline(buffer, BUFF_SIZE); // column labels } ifs.getline(buffer, BUFF_SIZE); // actual displacement data tokenize(vs, buffer); vector<vector3> vib1, vib2, vib3; double x, y, z; while(vs.size() >= 5) { for (unsigned int i = 2; i < vs.size()-2; i += 3) { x = atof(vs[i].c_str()); y = atof(vs[i+1].c_str()); z = atof(vs[i+2].c_str()); if (i == 2) vib1.push_back(vector3(x, y, z)); else if (i == 5) vib2.push_back(vector3(x, y, z)); else if (i == 8) vib3.push_back(vector3(x, y, z)); } if (!ifs.getline(buffer, BUFF_SIZE)) break; tokenize(vs,buffer); } Lx.push_back(vib1); if (vib2.size()) Lx.push_back(vib2); if (vib3.size()) Lx.push_back(vib3); } else if(strstr(buffer, " This molecule is "))//rotational data { if(strstr(buffer, "asymmetric")) RotorType = OBRotationData::ASYMMETRIC; else if(strstr(buffer, "symmetric")) RotorType = OBRotationData::SYMMETRIC; else if(strstr(buffer, "linear")) RotorType = OBRotationData::LINEAR; else RotorType = OBRotationData::UNKNOWN; ifs.getline(buffer,BUFF_SIZE); //symmetry number tokenize(vs, buffer); RotSymNum = atoi(vs[3].c_str()); } else if(strstr(buffer, "Rotational constant")) { tokenize(vs, buffer); RotConsts.clear(); for (unsigned int i=3; i<vs.size(); ++i) RotConsts.push_back(atof(vs[i].c_str())); } else if(strstr(buffer, "alpha electrons")) // # of electrons / orbital { tokenize(vs, buffer); if (vs.size() == 6) { // # alpha electrons # beta electrons aHOMO = atoi(vs[0].c_str()); bHOMO = atoi(vs[3].c_str()); } } else if(strstr(buffer, "rbital symmetries")) // orbital symmetries { symmetries.clear(); std::string label; // used as a temporary to remove "(" and ")" from labels int iii,offset = 0; bool bDoneSymm; // Extract both Alpha and Beta symmetries ifs.getline(buffer, BUFF_SIZE); // skip the current line for(iii=0; (iii<2); iii++) { if (strstr(buffer, "electronic state")) break; // We've gone too far! while (!ifs.eof() && ((NULL != strstr(buffer,"Alpha")) || (NULL != strstr(buffer,"Beta")))) { // skip the Alpha: and Beta: title lines ifs.getline(buffer, BUFF_SIZE); } do { bDoneSymm = (NULL == strstr(buffer, "(")); if (!bDoneSymm) { tokenize(vs, buffer); if ((NULL != strstr(buffer, "Occupied")) || (NULL != strstr(buffer, "Virtual"))) { offset = 1; // skip first token } else { offset = 0; } for (unsigned int i = offset; i < vs.size(); ++i) { label = vs[i].substr(1, vs[i].length() - 2); symmetries.push_back(label); } ifs.getline(buffer, BUFF_SIZE); // get a new line if we've been reading symmetries } // don't read a new line if we're done with symmetries } while (!ifs.eof() && !bDoneSymm); } // end alpha/beta section } else if (strstr(buffer, "Alpha") && strstr(buffer, ". eigenvalues --")) { orbitals.clear(); betaStart = 0; while (strstr(buffer, ". eigenvalues --")) { tokenize(vs, buffer); if (vs.size() < 4) break; if (vs[0].find("Beta") !=string::npos && betaStart == 0) // mark where we switch from alpha to beta betaStart = orbitals.size(); for (unsigned int i = 4; i < vs.size(); ++i) { orbitals.push_back(atof(vs[i].c_str())); } ifs.getline(buffer, BUFF_SIZE); } } else if(strstr(buffer, " Excited State")) // Force and wavelength data { // The above line appears for each state, so just append the info to the vectors tokenize(vs, buffer); if (vs.size() >= 9) { double wavelength = atof(vs[6].c_str()); double force = atof(vs[8].substr(2).c_str()); // remove the "f=" part Forces.push_back(force); Wavelengths.push_back(wavelength); } } else if(strstr(buffer, " Ground to excited state Transition electric dipole moments (Au):")) // Electronic dipole moments { ifs.getline(buffer, BUFF_SIZE); // Headings ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); EDipole.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(velocity)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsVelocity.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(length)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsLength.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if (strstr(buffer, "Forces (Hartrees/Bohr)")) { ifs.getline(buffer, BUFF_SIZE); // column headers ifs.getline(buffer, BUFF_SIZE); // ------ ifs.getline(buffer, BUFF_SIZE); // real data } else if (strstr(buffer, "Isotropic = ")) // NMR shifts { tokenize(vs, buffer); if (vs.size() >= 4) { atom = mol.GetAtom(atoi(vs[0].c_str())); OBPairData *nmrShift = new OBPairData(); nmrShift->SetAttribute("NMR Isotropic Shift"); string shift = vs[4].c_str(); nmrShift->SetValue(shift); atom->SetData(nmrShift); } } else if(strstr(buffer,"SCF Done:") != NULL) { tokenize(vs,buffer); mol.SetEnergy(atof(vs[4].c_str()) * HARTEE_TO_KCALPERMOL); confEnergies.push_back(mol.GetEnergy()); } /* Temporarily commented out until the handling of energy in OBMol is sorted out // MP2 energies also use a different syntax // PM3 energies use a different syntax else if(strstr(buffer,"E (Thermal)") != NULL) { ifs.getline(buffer,BUFF_SIZE); //Headers ifs.getline(buffer,BUFF_SIZE); //Total energy; what we want tokenize(vs,buffer); mol.SetEnergy(atof(vs[1].c_str())); confEnergies.push_back(mol.GetEnergy()); } */ else if(strstr(buffer,"Standard basis:") != NULL) { add_unique_pairdata_to_mol(&mol,"basis",buffer,2); } else if(strstr(buffer,"Zero-point correction=") != NULL) { tokenize(vs,buffer); ezpe = atof(vs[2].c_str()); ezpe_set = true; } else if(strstr(buffer,"Thermal correction to Enthalpy=") != NULL) { tokenize(vs,buffer); Hcorr = atof(vs[4].c_str()); Hcorr_set = true; } else if(strstr(buffer,"Thermal correction to Gibbs Free Energy=") != NULL) { tokenize(vs,buffer); Gcorr = atof(vs[6].c_str()); Gcorr_set = true; } else if (strstr(buffer,"CV") != NULL) { ifs.getline(buffer,BUFF_SIZE); //Headers ifs.getline(buffer,BUFF_SIZE); //Total heat capacity tokenize(vs,buffer); if (vs.size() == 4) { if (vs[0].compare("Total") == 0) { CV = atof(vs[2].c_str()); CV_set = true; } } ifs.getline(buffer,BUFF_SIZE); //Electronic ifs.getline(buffer,BUFF_SIZE); //Translational tokenize(vs,buffer); if ((vs.size() == 4) && (vs[0].compare("Translational") == 0) ) { Scomponents.push_back(atof(vs[3].c_str())); } ifs.getline(buffer,BUFF_SIZE); //Rotational tokenize(vs,buffer); if ((vs.size() == 4) && (vs[0].compare("Rotational") == 0)) { Scomponents.push_back(atof(vs[3].c_str())); } ifs.getline(buffer,BUFF_SIZE); //Vibrational tokenize(vs,buffer); if ((vs.size() == 4) && (vs[0].compare("Vibrational") == 0)) { Scomponents.push_back(atof(vs[3].c_str())); } } else if ((strstr(buffer,"Temperature=") != NULL) && (strstr(buffer,"Pressure=") != NULL)) { tokenize(vs,buffer); temperature = atof(vs[1].c_str()); } else if (strstr(buffer, "(0 K)") != NULL) { /* This must be the last else */ int i,nsearch; const char *search[] = { "CBS-QB3 (0 K)", "G2(0 K)", "G3(0 K)", "G4(0 K)", "W1BD (0 K)", "W1U (0 K)" }; const char *mymeth[] = { "CBS-QB3", "G2", "G3", "G4", "W1BD", "W1U" }; const int myindex[] = { 3, 2, 2, 2, 3, 3 }; nsearch = sizeof(search)/sizeof(search[0]); for(i=0; (i<nsearch); i++) { if(strstr(buffer,search[i]) != NULL) { tokenize(vs,buffer); E0 = atof(vs[myindex[i]].c_str()); E0_set = 1; thermo_method = mymeth[i]; break; } } } } // end while if (mol.NumAtoms() == 0) { // e.g., if we're at the end of a file PR#1737209 mol.EndModify(); return false; } mol.EndModify(); // Set conformers to all coordinates we adopted // but remove last geometry -- it's a duplicate if (vconf.size() > 1) vconf.pop_back(); mol.SetConformers(vconf); mol.SetConformer(mol.NumConformers() - 1); // Copy the conformer data too confData->SetDimension(confDimensions); confData->SetEnergies(confEnergies); confData->SetForces(confForces); mol.SetData(confData); // Check whether we have data to extract heat of formation. if (ezpe_set && Hcorr_set && Gcorr_set && E0_set && CV_set && (thermo_method.size() > 0)) { extract_thermo(&mol,thermo_method,temperature,ezpe, Hcorr,Gcorr,E0,CV,RotSymNum,Scomponents); } // Attach orbital data, if there is any if (orbitals.size() > 0) { OBOrbitalData *od = new OBOrbitalData; if (aHOMO == bHOMO) { od->LoadClosedShellOrbitals(orbitals, symmetries, aHOMO); } else { // we have to separate the alpha and beta vectors std::vector<double> betaOrbitals; std::vector<std::string> betaSymmetries; unsigned int initialSize = orbitals.size(); unsigned int symmSize = symmetries.size(); if (initialSize != symmSize || betaStart == -1) { cerr << "Inconsistency: orbitals have " << initialSize << " elements while symmetries have " << symmSize << endl; } else { for (unsigned int i = betaStart; i < initialSize; ++i) { betaOrbitals.push_back(orbitals[i]); if (symmetries.size() > 0) betaSymmetries.push_back(symmetries[i]); } // ok, now erase the end elements of orbitals and symmetries for (unsigned int i = betaStart; i < initialSize; ++i) { orbitals.pop_back(); if (symmetries.size() > 0) symmetries.pop_back(); } // and load the alphas and betas od->LoadAlphaOrbitals(orbitals, symmetries, aHOMO); od->LoadBetaOrbitals(betaOrbitals, betaSymmetries, bHOMO); } } od->SetOrigin(fileformatInput); mol.SetData(od); } //Attach vibrational data, if there is any, to molecule if(Frequencies.size()>0) { OBVibrationData* vd = new OBVibrationData; vd->SetData(Lx, Frequencies, Intensities); vd->SetOrigin(fileformatInput); mol.SetData(vd); } //Attach rotational data, if there is any, to molecule if(RotConsts[0]!=0.0) { OBRotationData* rd = new OBRotationData; rd->SetData(RotorType, RotConsts, RotSymNum); rd->SetOrigin(fileformatInput); mol.SetData(rd); } // Attach unit cell translation vectors if found if (numTranslationVectors > 0) { OBUnitCell* uc = new OBUnitCell; uc->SetData(translationVectors[0], translationVectors[1], translationVectors[2]); uc->SetOrigin(fileformatInput); mol.SetData(uc); } //Attach electronic transition data, if there is any, to molecule if(Forces.size() > 0 && Forces.size() == Wavelengths.size()) { OBElectronicTransitionData* etd = new OBElectronicTransitionData; etd->SetData(Wavelengths, Forces); if (EDipole.size() == Forces.size()) etd->SetEDipole(EDipole); if (RotatoryStrengthsLength.size() == Forces.size()) etd->SetRotatoryStrengthsLength(RotatoryStrengthsLength); if (RotatoryStrengthsVelocity.size() == Forces.size()) etd->SetRotatoryStrengthsVelocity(RotatoryStrengthsVelocity); etd->SetOrigin(fileformatInput); mol.SetData(etd); } if (!pConv->IsOption("b",OBConversion::INOPTIONS)) mol.ConnectTheDots(); if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) mol.PerceiveBondOrders(); if (hasPartialCharges) { mol.SetPartialChargesPerceived(); // Annotate that partial charges come from Mulliken OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue(chargeModel); // Mulliken, ESP, etc. dp->SetOrigin(fileformatInput); mol.SetData(dp); } mol.SetTotalCharge(total_charge); mol.SetTotalSpinMultiplicity(spin_multiplicity); mol.SetTitle(title); return(true); }
// Reading Gaussian output has been tested for G98 and G03 to some degree // If you have problems (or examples of older output), please contact // the [email protected] mailing list and/or post a bug bool GaussianOutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { OBMol* pmol = pOb->CastAndClear<OBMol>(); if(pmol==NULL) return false; //Define some references so we can use the old parameter names istream &ifs = *pConv->GetInStream(); OBMol &mol = *pmol; const char* title = pConv->GetTitle(); char buffer[BUFF_SIZE]; string str,str1; double x,y,z; OBAtom *atom; vector<string> vs; int charge = 0; unsigned int spin = 1; bool hasPartialCharges = false; string chargeModel; // descriptor for charges (e.g. "Mulliken") // coordinates of all steps // Set conformers to all coordinates we adopted std::vector<double*> vconf; // index of all frames/conformers std::vector<double> coordinates; // coordinates in each frame int natoms = 0; // number of atoms -- ensure we don't go to a new job with a different molecule // OBConformerData stores information about multiple steps // we can change attribute later if needed (e.g., IRC) OBConformerData *confData = new OBConformerData(); confData->SetOrigin(fileformatInput); std::vector<unsigned short> confDimensions = confData->GetDimension(); // to be fair, set these all to 3D std::vector<double> confEnergies = confData->GetEnergies(); std::vector< std::vector< vector3 > > confForces = confData->GetForces(); //Vibrational data std::vector< std::vector< vector3 > > Lx; std::vector<double> Frequencies, Intensities; //Rotational data std::vector<double> RotConsts(3); int RotSymNum=1; OBRotationData::RType RotorType; // Translation vectors (if present) vector3 translationVectors[3]; int numTranslationVectors = 0; //Electronic Excitation data std::vector<double> Forces, Wavelengths, EDipole, RotatoryStrengthsVelocity, RotatoryStrengthsLength; // Orbital data std::vector<double> orbitals; std::vector<std::string> symmetries; int aHOMO, bHOMO, betaStart; //Put some metadata into OBCommentData string comment("Gaussian "); ifs.getline(buffer,BUFF_SIZE); if(*buffer) { comment += strchr(buffer,'=')+2; comment += ""; for(unsigned i=0; i<115, ifs; ++i) { ifs.getline(buffer,BUFF_SIZE); if(buffer[1]=='#') { //the line describing the method comment += buffer; OBCommentData *cd = new OBCommentData; cd->SetData(comment); cd->SetOrigin(fileformatInput); mol.SetData(cd); break; } } } int i=0; bool no_symmetry=false; char coords_type[25]; //Prescan file to find second instance of "orientation:" //This will be the kind of coords used in the chk/fchk file //Unless the "nosym" keyword has been requested while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"Symmetry turned off by external request.") != NULL) { // The "nosym" keyword has been requested no_symmetry = true; } if (strstr(buffer,"orientation:") !=NULL) { i++; tokenize (vs, buffer); strcpy (coords_type, vs[0].c_str()); strcat (coords_type, " orientation:"); } if ((no_symmetry && i==1) || i==2) break; // Check for the last line of normal output and exit loop, otherwise, // the rewind below will no longer work. if (strstr(buffer,"Normal termination of Gaussian") != NULL) break; } ifs.seekg(0); //rewind mol.BeginModify(); while (ifs.getline(buffer,BUFF_SIZE)) { if (strstr(buffer,"Multiplicity") != NULL) { tokenize(vs, buffer, " \t\n"); if (vs.size() == 6) { charge = atoi(vs[2].c_str()); spin = atoi(vs[5].c_str()); } ifs.getline(buffer,BUFF_SIZE); } else if (strstr(buffer, coords_type) != NULL) { numTranslationVectors = 0; // ignore old translationVectors ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // --------------- ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size() == 6) { x = atof((char*)vs[3].c_str()); y = atof((char*)vs[4].c_str()); z = atof((char*)vs[5].c_str()); int atomicNum = atoi((char*)vs[1].c_str()); if (atomicNum > 0) // translation vectors are "-2" { if (natoms == 0) { // first time reading the molecule, create each atom atom = mol.NewAtom(); atom->SetAtomicNum(atoi((char*)vs[1].c_str())); } coordinates.push_back(x); coordinates.push_back(y); coordinates.push_back(z); } else { translationVectors[numTranslationVectors++].Set(x, y, z); } if (!ifs.getline(buffer,BUFF_SIZE)) { break; } tokenize(vs,buffer); } // done with reading atoms natoms = mol.NumAtoms(); // malloc / memcpy double *tmpCoords = new double [(natoms)*3]; memcpy(tmpCoords, &coordinates[0], sizeof(double)*natoms*3); vconf.push_back(tmpCoords); coordinates.clear(); confDimensions.push_back(3); // always 3D -- OBConformerData allows mixing 2D and 3D structures } else if(strstr(buffer,"Dipole moment") != NULL) { ifs.getline(buffer,BUFF_SIZE); // actual components X ### Y #### Z ### tokenize(vs,buffer); if (vs.size() >= 6) { OBVectorData *dipoleMoment = new OBVectorData; dipoleMoment->SetAttribute("Dipole Moment"); double x, y, z; x = atof(vs[1].c_str()); y = atof(vs[3].c_str()); z = atof(vs[5].c_str()); dipoleMoment->SetData(x, y, z); dipoleMoment->SetOrigin(fileformatInput); mol.SetData(dipoleMoment); } if (!ifs.getline(buffer,BUFF_SIZE)) break; } else if(strstr(buffer,"Total atomic charges") != NULL || strstr(buffer,"Mulliken atomic charges") != NULL) { hasPartialCharges = true; chargeModel = "Mulliken"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"Sum of ") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if (strstr(buffer, "Charges from ESP fit") != NULL) { hasPartialCharges = true; chargeModel = "ESP"; ifs.getline(buffer,BUFF_SIZE); // Charge / dipole line ifs.getline(buffer,BUFF_SIZE); // column header ifs.getline(buffer,BUFF_SIZE); // real charges tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"-----") == NULL) { atom = mol.GetAtom(atoi(vs[0].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer,"Natural Population") != NULL) { hasPartialCharges = true; chargeModel = "NBO"; ifs.getline(buffer,BUFF_SIZE); // column headings ifs.getline(buffer,BUFF_SIZE); // again ifs.getline(buffer,BUFF_SIZE); // again (-----) ifs.getline(buffer,BUFF_SIZE); // real data tokenize(vs,buffer); while (vs.size() >= 3 && strstr(buffer,"=====") == NULL) { atom = mol.GetAtom(atoi(vs[1].c_str())); if (!atom) break; atom->SetPartialCharge(atof(vs[2].c_str())); if (!ifs.getline(buffer,BUFF_SIZE)) break; tokenize(vs,buffer); } } else if(strstr(buffer, " Frequencies -- ")) //vibrational frequencies { //The info should appear only once as several blocks starting with this line tokenize(vs, buffer); for(unsigned int i=2; i<vs.size(); ++i) Frequencies.push_back(atof(vs[i].c_str())); ifs.getline(buffer,BUFF_SIZE); //Red. masses ifs.getline(buffer,BUFF_SIZE); //Frc consts ifs.getline(buffer,BUFF_SIZE); //IR Inten tokenize(vs, buffer); for(unsigned int i=3; i<vs.size(); ++i) Intensities.push_back(atof(vs[i].c_str())); ifs.getline(buffer, BUFF_SIZE); // column labels or Raman intensity if(strstr(buffer, "Raman Activ")) { ifs.getline(buffer, BUFF_SIZE); // Depolar (P) ifs.getline(buffer, BUFF_SIZE); // Depolar (U) ifs.getline(buffer, BUFF_SIZE); // column labels } ifs.getline(buffer, BUFF_SIZE); // actual displacement data tokenize(vs, buffer); vector<vector3> vib1, vib2, vib3; double x, y, z; while(vs.size() > 5) { for (unsigned int i = 2; i < vs.size()-2; i += 3) { x = atof(vs[i].c_str()); y = atof(vs[i+1].c_str()); z = atof(vs[i+2].c_str()); if (i == 2) vib1.push_back(vector3(x, y, z)); else if (i == 5) vib2.push_back(vector3(x, y, z)); else if (i == 8) vib3.push_back(vector3(x, y, z)); } if (!ifs.getline(buffer, BUFF_SIZE)) break; tokenize(vs,buffer); } Lx.push_back(vib1); if (vib2.size()) Lx.push_back(vib2); if (vib3.size()) Lx.push_back(vib3); } else if(strstr(buffer, " This molecule is "))//rotational data { if(strstr(buffer, "asymmetric")) RotorType = OBRotationData::ASYMMETRIC; else if(strstr(buffer, "symmetric")) RotorType = OBRotationData::SYMMETRIC; else if(strstr(buffer, "linear")) RotorType = OBRotationData::LINEAR; else RotorType = OBRotationData::UNKNOWN; ifs.getline(buffer,BUFF_SIZE); //symmetry number tokenize(vs, buffer); RotSymNum = atoi(vs[3].c_str()); } else if(strstr(buffer, "Rotational constant")) { tokenize(vs, buffer); RotConsts.clear(); for (unsigned int i=3; i<vs.size(); ++i) RotConsts.push_back(atof(vs[i].c_str())); } else if(strstr(buffer, "alpha electrons")) // # of electrons / orbital { tokenize(vs, buffer); if (vs.size() == 6) { // # alpha electrons # beta electrons aHOMO = atoi(vs[0].c_str()); bHOMO = atoi(vs[3].c_str()); } } else if(strstr(buffer, "rbital symmetries")) // orbital symmetries { symmetries.clear(); std::string label; // used as a temporary to remove "(" and ")" from labels int offset = 0; while(true) { ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); // parse first line "Occupied" ... for (unsigned int i = 1; i < vs.size(); ++i) { label = vs[i].substr(1, vs[i].length() - 2); symmetries.push_back(label); } ifs.getline(buffer, BUFF_SIZE); // Parse remaining lines while (strstr(buffer, "(")) { tokenize(vs, buffer); if (strstr(buffer, "Virtual")) { offset = 1; // skip first token } else { offset = 0; } for (unsigned int i = offset; i < vs.size(); ++i) { label = vs[i].substr(1, vs[i].length() - 2); symmetries.push_back(label); } ifs.getline(buffer, BUFF_SIZE); // get next line } // end parsing symmetry labels if (!strstr(buffer, "Beta")) // no beta orbitals break; } // end alpha/beta section } else if (strstr(buffer, "Alpha") && strstr(buffer, ". eigenvalues --")) { orbitals.clear(); betaStart = 0; while (strstr(buffer, ". eigenvalues --")) { tokenize(vs, buffer); if (vs.size() < 4) break; if (vs[0].find("Beta") !=string::npos && betaStart == 0) // mark where we switch from alpha to beta betaStart = orbitals.size(); for (unsigned int i = 4; i < vs.size(); ++i) { orbitals.push_back(atof(vs[i].c_str())); } ifs.getline(buffer, BUFF_SIZE); } } else if(strstr(buffer, " Excited State")) // Force and wavelength data { // The above line appears for each state, so just append the info to the vectors tokenize(vs, buffer); if (vs.size() == 9) { double wavelength = atof(vs[6].c_str()); double force = atof(vs[8].substr(2).c_str()); Forces.push_back(force); Wavelengths.push_back(wavelength); } } else if(strstr(buffer, " Ground to excited state Transition electric dipole moments (Au):")) // Electronic dipole moments { ifs.getline(buffer, BUFF_SIZE); // Headings ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); EDipole.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(velocity)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsVelocity.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if(strstr(buffer, " state X Y Z R(length)")) { // Rotatory Strengths ifs.getline(buffer, BUFF_SIZE); // First entry tokenize(vs, buffer); while (vs.size() == 5) { double s = atof(vs[4].c_str()); RotatoryStrengthsLength.push_back(s); ifs.getline(buffer, BUFF_SIZE); tokenize(vs, buffer); } } else if (strstr(buffer, "Forces (Hartrees/Bohr)")) { ifs.getline(buffer, BUFF_SIZE); // column headers ifs.getline(buffer, BUFF_SIZE); // ------ ifs.getline(buffer, BUFF_SIZE); // real data } else if (strstr(buffer, "Isotropic = ")) // NMR shifts { tokenize(vs, buffer); if (vs.size() >= 4) { atom = mol.GetAtom(atoi(vs[0].c_str())); OBPairData *nmrShift = new OBPairData(); nmrShift->SetAttribute("NMR Isotropic Shift"); string shift = vs[4].c_str(); nmrShift->SetValue(shift); atom->SetData(nmrShift); } } else if(strstr(buffer,"SCF Done:") != NULL) { #define HARTREE_TO_KCAL 627.509469 tokenize(vs,buffer); mol.SetEnergy(atof(vs[4].c_str()) * HARTREE_TO_KCAL); confEnergies.push_back(mol.GetEnergy()); } /* Temporarily commented out until the handling of energy in OBMol is sorted out // MP2 energies also use a different syntax // PM3 energies use a different syntax else if(strstr(buffer,"E (Thermal)") != NULL) { ifs.getline(buffer,BUFF_SIZE); //Headers ifs.getline(buffer,BUFF_SIZE); //Total energy; what we want tokenize(vs,buffer); mol.SetEnergy(atof(vs[1].c_str())); confEnergies.push_back(mol.GetEnergy()); } */ } // end while if (mol.NumAtoms() == 0) { // e.g., if we're at the end of a file PR#1737209 mol.EndModify(); return false; } mol.EndModify(); // Set conformers to all coordinates we adopted // but remove last geometry -- it's a duplicate if (vconf.size() > 1) vconf.pop_back(); mol.SetConformers(vconf); mol.SetConformer(mol.NumConformers() - 1); // Copy the conformer data too confData->SetDimension(confDimensions); confData->SetEnergies(confEnergies); confData->SetForces(confForces); mol.SetData(confData); // Attach orbital data, if there is any if (orbitals.size() > 0) { OBOrbitalData *od = new OBOrbitalData; if (aHOMO == bHOMO) { od->LoadClosedShellOrbitals(orbitals, symmetries, aHOMO); } else { // we have to separate the alpha and beta vectors std::vector<double> betaOrbitals; std::vector<std::string> betaSymmetries; unsigned int initialSize = orbitals.size(); for (unsigned int i = betaStart; i < initialSize; ++i) { betaOrbitals.push_back(orbitals[i]); if (symmetries.size() > 0) betaSymmetries.push_back(symmetries[i]); } // ok, now erase the end elements of orbitals and symmetries for (unsigned int i = betaStart; i < initialSize; ++i) { orbitals.pop_back(); if (symmetries.size() > 0) symmetries.pop_back(); } // and load the alphas and betas od->LoadAlphaOrbitals(orbitals, symmetries, aHOMO); od->LoadBetaOrbitals(betaOrbitals, betaSymmetries, bHOMO); } od->SetOrigin(fileformatInput); mol.SetData(od); } //Attach vibrational data, if there is any, to molecule if(Frequencies.size()>0) { OBVibrationData* vd = new OBVibrationData; vd->SetData(Lx, Frequencies, Intensities); vd->SetOrigin(fileformatInput); mol.SetData(vd); } //Attach rotational data, if there is any, to molecule if(RotConsts[0]!=0.0) { OBRotationData* rd = new OBRotationData; rd->SetData(RotorType, RotConsts, RotSymNum); rd->SetOrigin(fileformatInput); mol.SetData(rd); } // Attach unit cell translation vectors if found if (numTranslationVectors > 0) { OBUnitCell* uc = new OBUnitCell; uc->SetData(translationVectors[0], translationVectors[1], translationVectors[2]); uc->SetOrigin(fileformatInput); mol.SetData(uc); } //Attach electronic transition data, if there is any, to molecule if(Forces.size() > 0 && Forces.size() == Wavelengths.size()) { OBElectronicTransitionData* etd = new OBElectronicTransitionData; etd->SetData(Wavelengths, Forces); if (EDipole.size() == Forces.size()) etd->SetEDipole(EDipole); if (RotatoryStrengthsLength.size() == Forces.size()) etd->SetRotatoryStrengthsLength(RotatoryStrengthsLength); if (RotatoryStrengthsVelocity.size() == Forces.size()) etd->SetRotatoryStrengthsVelocity(RotatoryStrengthsVelocity); etd->SetOrigin(fileformatInput); mol.SetData(etd); } if (!pConv->IsOption("b",OBConversion::INOPTIONS)) mol.ConnectTheDots(); if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) mol.PerceiveBondOrders(); if (hasPartialCharges) { mol.SetPartialChargesPerceived(); // Annotate that partial charges come from Mulliken OBPairData *dp = new OBPairData; dp->SetAttribute("PartialCharges"); dp->SetValue(chargeModel); // Mulliken, ESP, etc. dp->SetOrigin(fileformatInput); mol.SetData(dp); } mol.SetTotalCharge(charge); mol.SetTotalSpinMultiplicity(spin); mol.SetTitle(title); return(true); }