// Split a fstring into multiple substrings void FUStringConversion::ToFStringList(const fstring& value, FStringList& array) { const fchar* s = value.c_str(); // Skip beginning white spaces fchar c; while ((c = *s) != 0 && (c == ' ' || c == '\t' || c == '\r' || c == '\n')) { ++s; } size_t index = 0; while (*s != 0) { const fchar* word = s; // Find next white space while ((c = *s) != 0 && c != ' ' && c != '\t' && c != '\r' && c != '\n') { ++s; } if (index < array.size()) array[index++].append(word, s - word); else { array.push_back(fstring(word, s - word)); ++index; } // Skip all white spaces while ((c = *s) != 0 && (c == ' ' || c == '\t' || c == '\r' || c == '\n')) { ++s; } } array.resize(index); }
void FBasisSetLibraryImpl::ImportMolproLib(std::string const &FileName, std::ostream *pxout) { // how this files look: // Note: Lines with * are comments. // He s STO-3G STO3G : 3 1 1.3 // STO-3G // 6.3624214 1.158923 0.31364979 0.15432897 0.53532814 0.44463454 // Li s STO-3G STO3G : 6 2 1.3 4.6 // STO-3G // 16.119575 2.9362007 0.7946505 0.6362897 0.1478601 0.0480887 0.15432897 // 0.53532814 0.44463454 -0.09996723 0.39951283 0.70011547 // Li p STO-3G STO3G : 3 1 1.3 // STO-3G // 0.6362897 0.1478601 0.0480887 0.15591627 0.60768372 0.39195739 // B s cc-pVDZ VDZ : 9 3 1.9 1.9 9.9 // cc-pVDZ // 4570 685.9 156.5 44.47 14.48 5.131 1.898 0.3329 0.1043 0.000696 // 0.005353 0.027134 0.10138 0.272055 0.448403 0.290123 0.014322 -0.003486 // -0.000139 -0.001097 -0.005444 -0.021916 -0.059751 -0.138732 -0.131482 // 0.539526 0.580774 1 // interpretation: First Line: // ElementName OrbitalType N*[AlternativeNameI] : // #PrimOrbitals #OrbitalsTheyAreContractedTo N*[ContractFrom.ContractTo] // Any string, usually reference to be cited for the basis set // M*[PrimitiveOrbitalExponents] L*[PrimitiveOrbitalCoefficients] // for each contraction contraction length coefficients are supplied. // and data sets like this are also seen: // H P 6-311G2P : 2 0 // G92 6-311G polarization // .15000000D+01 .37500000D+00 // H P 6-311G3P : 3 0 // G92 6-311G polarization // .30000000D+01 .75000000D+00 .18750000D+00 // H S 6-31G** 6-31G* 6-31G : 4 1 1.3 // G92 6-31G** // .18731137D+02 .28253944D+01 .64012169D+00 .16127776D+00 .33494604D-01 // .23472695D+00 .81375733D+00 // all/some primitive orbitals are probably left uncontracted. Therefore no // contraction coefficients are supplied for these (only exponents). // Also I originally tought that the names of the basis set after the first // one were alternative Names of the set (like cc-pVDZ = VDZ). This is // however not how it works, at least not in all cases: It seems that names // which are supplied actually mean that the currently described elements // have to be inserted into the basis sets of ALL names provided, which // may or may not be identical basis sets (for example, for the s and p // orbitals of the 931G basis set, also the starred names are listed, which // means that these basis sets share these orbitals, altough in general they // are different). // so.. let's begin the mess. // read the entire file into a stringstream object (we need to modify it). TArray<char> pFileContent; if (!LoadFileIntoMemory(pFileContent, FileName)) throw std::runtime_error( "FBasisSetLibraryImpl: Failed to open file '"+FileName+"' for basis library import." ); // okay, now this is very bad. This FORTRAN stuff (sometimes) denotes // exponents in scientific notation not as "23423e-3" but as "23423D-03". We // do a lame attempt to convert that. This might break in some situations. // FIXME: correct this somehow. for (uint i = 0; i < pFileContent.size() - 2; ++i) { if (pFileContent[i]=='D' && (pFileContent[i+1]=='+' || pFileContent[i+1]=='-') && pFileContent[i+2]=='0') pFileContent[i] = 'E'; } FBasisNameSet AllBasisNames; std::stringstream str(&pFileContent[0], std::stringstream::in); // ^- NOTE: "in" means from stream, into local data, not // into stream (file naming convention) try { while(str.good()) { // clear exception mask, now stream will not throw() when something // unexpected happens. str.exceptions(std::ios::goodbit); std::string s; std::getline( str, s ); if (s.size() == 0 || s[0] == '*' || s[0] == '!') // empty or comment line, throw it away and go on with the next continue; if (!str.good()) // eof, bad etc. break; str.exceptions(std::ios::failbit); // ^- when something fails, throw an exception. this will happen // if the actual file format does not match the one I had in mind // when coding this. // expected format: ElementName[w]OrbitalType[w]AlternativeNames[w] : // using namespace std; // cout << "** read line: '" << s << "'" << endl; std::stringstream line(s, std::stringstream::in); line.exceptions(std::ios::badbit | std::ios::failbit); std::string Element, Type; line >> Element >> Type; if ( Type.size() != 1 ) throw std::runtime_error( "Parsing error, cannot interpret orbital type '" + Type + "'" ); int AngMom; std::vector<std::pair<int,int> > Cos; std::vector<double> Exps; char cAngMom = ::tolower(Type[0]); for (AngMom = 0; AngMom < 9; ++ AngMom) if (cAngMom == "spdfghikl"[AngMom]) break; if (AngMom == 9) throw std::runtime_error((format("Failed to understand angular momentum '%s'.") % cAngMom).str()); // std::cout << "Element " << Element << " Type " << Type << std::endl; FStringList BasisNames; // all names of basis sets in which the // current entry is to be inserted. for (line >> s; s != ":"; line >> s) { // std::cout << "Alternative Name:" << s << std::endl; BasisNames.push_back( tolower(stripwhitespace(s)) ); AllBasisNames.insert(stripwhitespace(s)); } // expected format: #prim orbitals #contractions (#contr.)*[a.b] // denoting indices of begin and end of a contraction with the // following exponents/contraction coefficients. int nExp, nCo, nCoeff(0), // total number of contraction coefficients to read (in all contractions). nHighestExpInCo(0); // 1-based index. line >> nExp >> nCo; // std::cout << "#Prim " << nExp << " #Co " << nCo << std::endl; Cos.reserve(nCo); for (int i = 0; i < nCo; ++ i){ std::pair<int,int> iCo; char Dot; line >> iCo.first >> Dot >> iCo.second; iCo.first -= 1; // convert to 0-based [begin,end). if (Dot != '.') throw std::runtime_error("GTO-Contraction read format error."); // std::cout << " Co: #" << iCo.first << "-#" << iCo.second << std::endl; if (iCo.second <= iCo.first || iCo.second > nExp) throw std::runtime_error("GTO-Contraction logical error."); nCoeff += iCo.second - iCo.first; nHighestExpInCo = std::max(nHighestExpInCo, iCo.second); Cos.push_back(iCo); } std::string EntryComment; do{ // read name, maybe skip comments. getline(str, EntryComment); } while (EntryComment.size() != 0 && EntryComment[0] == '*'); // cout << "Entry Comment: " << EntryComment << endl; // now read exponents and contraction coefficients; // (this will break if comments are present in between) Exps.resize(nExp); std::vector<double> Coeffs(nCoeff, 0); for ( int i = 0; i < nExp; ++ i ){ str >> Exps[i]; // cout << "Exp: " << Exps.back() << endl; } // read in contraction coefficients. if ( nCo != 0 ) for ( int i = 0; i < nCoeff; ++ i ){ double Coeff; str >> Coeff; // cout << "Coeff: " << Coeff << endl; Coeffs[i] = Coeff; } // copy over the contraction coefficients to the contractions. std::vector<double> CoMatrix(Exps.size() * nCo, 0.); int iCoeff = 0; for ( int i = 0; i < nCo; ++ i ){ for (int j = Cos[i].first; j != Cos[i].second; ++ j) CoMatrix[j + i*Exps.size()] = Coeffs[iCoeff + j - Cos[i].first]; iCoeff += (Cos[i].second - Cos[i].first); } // in some files some primitive orbitals are left uncontracted. // but these are not stored as 1-GTO contractions but the // coefficients for these are just not present in the file. // Make 1-GTO contractions for them. int nAdditionalCo = nExp - nHighestExpInCo; if (0 != nAdditionalCo) { // generate 1-GTO-each contractions manually. int nCoExplicit = nCo; nCo += nAdditionalCo; CoMatrix.resize(Exps.size() * nCo, 0.); int iStart = nHighestExpInCo; // ^- 0 based index, the rhs one is 1-based. for ( int i = 0; i < nAdditionalCo; ++ i ) { int iCo = i + nCoExplicit; CoMatrix[(i + iStart) + Exps.size() * iCo] = 1.; } } // import all names of the basis function FStringList::const_iterator itName; _for_each(itName, BasisNames) m_BasisNames.insert(*itName); // make the actual basis function and link it to all the names. FAtomShellPtr pBfn(new FAtomShell(AngMom, &Exps[0], Exps.size(), &CoMatrix[0], CoMatrix.size()/Exps.size())); int iElement = ElementNumberFromName(Element); _for_each(itName, BasisNames) m_BasisFns.insert( FBasisFnMap::value_type(MakeKey(*itName, iElement), pBfn) ); // chew the EOL marker if present, leave loop otherwise. str.exceptions(std::ios::goodbit); str.ignore(0xbad, '\n'); }; } catch (std::ios_base::failure &e){ // this is not exactly something i would usually // call "error handling" but i really hate this string- // fiddling stuff and we can't really do anything better // about it anyway. std::cerr << "PARSER EXCEPTION:" << e.what() << std::endl; throw std::runtime_error( "Parsing of LIBMOL file FAILED because the actual syntax did not match the expected one. Last entry successfully processed: "); } catch (std::exception &e){ std::cerr << "Exception during LibmolFile parsing: " << e.what() << std::endl; throw; }; // if provided, write some imporant looking comments about what // we loaded to the standard output. if (pxout) { std::ostream &xout = *pxout; std::size_t iDirSep = FileName.rfind('/'); if ( iDirSep == std::string::npos ) iDirSep = 0; else iDirSep += 1; xout << format(" LOADED %-25s") % FileName.substr(iDirSep); if ( 1 ) { xout << "["; FBasisNameSet::const_iterator itSet; uint nLen = 0; _for_each(itSet, AllBasisNames) { if ( nLen >= 40 ) { xout << ",..."; break; } if (itSet != AllBasisNames.begin()) xout << ", "; xout << *itSet; nLen += itSet->size(); } xout << "]"; } xout << std::endl; }