void CheckValidDipeptide(OBConversion &conv, const string &test, unsigned int testCount) { OBMol mol; OBResidue *res; ostringstream os; mol.Clear(); conv.ReadString(&mol, test); chainsparser.PerceiveChains(mol); if (mol.NumResidues() != 2) { os << "not ok " << testCount << " # expected 2 residues, but found " << mol.NumResidues() << '\n'; os << "# "; FOR_RESIDUES_OF_MOL(res, mol) os << res->GetName() << " "; os << endl; BOOST_CHECK_MESSAGE( 0, os.str().c_str() ); } else { res = mol.GetResidue(0); BOOST_CHECK_MESSAGE( res, "Get first AA from dipeptide" ); res = mol.GetResidue(1); BOOST_CHECK_MESSAGE( res, "Get second AA from dipeptide" ); } }
void CheckInvalidResidue(OBConversion &conv, const string &test, unsigned int testCount) { OBMol mol; mol.Clear(); conv.ReadString(&mol, test); chainsparser.PerceiveChains(mol); if (mol.NumResidues() != 0) { OBResidue *res = mol.GetResidue(0); if (res->GetName() == "LIG") { // ligand, not residue cout << "ok " << testCount << " # found ligand, not residue " << test << '\n'; } else { cout << "not ok " << testCount << " # expected 0 residues, found " << mol.NumResidues() << '\n'; cout << "# " << res->GetName() << endl; } } else cout << "ok " << testCount << " # correctly rejected " << test << '\n'; }
void CheckInvalidResidue(OBConversion &conv, const string &test, unsigned int testCount) { OBMol mol; ostringstream os; mol.Clear(); conv.ReadString(&mol, test); chainsparser.PerceiveChains(mol); if (mol.NumResidues() != 0) { OBResidue *res = mol.GetResidue(0); if (res->GetName() == "LIG") { // ligand, not residue BOOST_CHECK( 1 ); } else { os << "not ok " << testCount << " # expected 0 residues, found " << mol.NumResidues() << '\n'; os << "# " << res->GetName() << endl; BOOST_CHECK_MESSAGE( 0, os.str().c_str() ); } } else BOOST_CHECK( 1 ); }
void CheckValidDipeptide(OBConversion &conv, const string &test, unsigned int testCount) { OBMol mol; mol.Clear(); conv.ReadString(&mol, test); chainsparser.PerceiveChains(mol); if (mol.NumResidues() != 2) { cout << "not ok " << testCount << " # expected 2 residues, but found " << mol.NumResidues() << '\n'; cout << "# "; FOR_RESIDUES_OF_MOL(res, mol) cout << res->GetName() << " "; cout << endl; } else { OBResidue *res; res = mol.GetResidue(0); cout << "ok " << testCount << " # " << res->GetName(); res = mol.GetResidue(1); cout << " " << res->GetName() << '\n'; } }
static bool parseAtomRecord(char *buffer, OBMol &mol,int /*chainNum*/) /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,a2,a2)" */ { string sbuf = &buffer[6]; if (sbuf.size() < 48) return(false); bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false; bool elementFound = false; // true if correct element found in col 77-78 /* serial number */ string serno = sbuf.substr(0,5); /* atom name */ string atmid = sbuf.substr(6,4); /* chain */ char chain = sbuf.substr(15,1)[0]; /* element */ string element = " "; if (sbuf.size() > 71) { element = sbuf.substr(70,2); if (isalpha(element[1])) { if (element[0] == ' ') { element.erase(0, 1); elementFound = true; } else if (isalpha(element[0])) { elementFound = true; } } } if (!elementFound) { stringstream errorMsg; errorMsg << "WARNING: Problems reading a PDB file\n" << " Problems reading a HETATM or ATOM record.\n" << " According to the PDB specification,\n" << " columns 77-78 should contain the element symbol of an atom.\n" << " but OpenBabel found '" << element << "' (atom " << mol.NumAtoms()+1 << ")"; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); } // charge - optional string scharge; if (sbuf.size() > 73) { scharge = sbuf.substr(72,2); } //trim spaces on the right and left sides while (!atmid.empty() && atmid[0] == ' ') atmid = atmid.erase(0, 1); while (!atmid.empty() && atmid[atmid.size()-1] == ' ') atmid = atmid.substr(0,atmid.size()-1); /* residue name */ string resname = sbuf.substr(11,3); if (resname == " ") resname = "UNK"; else { while (!resname.empty() && resname[0] == ' ') resname = resname.substr(1,resname.size()-1); while (!resname.empty() && resname[resname.size()-1] == ' ') resname = resname.substr(0,resname.size()-1); } string type; if (!elementFound) { // OK, we have to fall back to determining the element from the atom type // This is unreliable, but there's no other choice if (EQn(buffer,"ATOM",4)) { type = atmid.substr(0,2); if (isdigit(type[0])) { // sometimes non-standard files have, e.g 11HH if (!isdigit(type[1])) type = atmid.substr(1,1); else type = atmid.substr(2,1); } else if ((sbuf[6] == ' ' && strncasecmp(type.c_str(), "Zn", 2) != 0 && strncasecmp(type.c_str(), "Fe", 2) != 0) || isdigit(type[1])) //type[1] is digit in Platon type = atmid.substr(0,1); // one-character element if (resname.substr(0,2) == "AS" || resname[0] == 'N') { if (atmid == "AD1") type = "O"; if (atmid == "AD2") type = "N"; } if (resname.substr(0,3) == "HIS" || resname[0] == 'H') { if (atmid == "AD1" || atmid == "AE2") type = "N"; if (atmid == "AE1" || atmid == "AD2") type = "C"; } if (resname.substr(0,2) == "GL" || resname[0] == 'Q') { if (atmid == "AE1") type = "O"; if (atmid == "AE2") type = "N"; } // fix: #2002557 if (atmid[0] == 'H' && (atmid[1] == 'D' || atmid[1] == 'E' || atmid[1] == 'G' || atmid[1] == 'H')) // HD, HE, HG, HH, .. type = "H"; } else { //must be hetatm record if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' '))) { if (isalpha(element[0])) type = element.substr(0,2); else type = element.substr(1,1); if (type.size() == 2) type[1] = tolower(type[1]); } else { // no element column to use if (isalpha(atmid[0])) { if (atmid.size() > 2 && (atmid[2] == '\0' || atmid[2] == ' ')) type = atmid.substr(0,2); else if (atmid[0] == 'A') // alpha prefix type = atmid.substr(1, atmid.size() - 1); else type = atmid.substr(0,1); } else if (atmid[0] == ' ') type = atmid.substr(1,1); // one char element else type = atmid.substr(1,2); // Some cleanup steps if (atmid == resname) { type = atmid; if (type.size() == 2) type[1] = tolower(type[1]); } else if (resname == "ADR" || resname == "COA" || resname == "FAD" || resname == "GPG" || resname == "NAD" || resname == "NAL" || resname == "NDP" || resname == "ABA") { if (type.size() > 1) type = type.substr(0,1); //type.erase(1,type.size()-1); } else // other residues if (isdigit(type[0])){ type = type.substr(1,1); } else if (type.size() > 1 && isdigit(type[1])) type = type.substr(0,1); else if (type.size() > 1 && isalpha(type[1])) { if (type[0] == 'O' && type[1] == 'H') type = type.substr(0,1); // no "Oh" element (e.g. 1MBN) else if(isupper(type[1])) { type[1] = tolower(type[1]); } } } } // HETATM records } // no element column to use OBAtom atom; /* X, Y, Z */ string xstr = sbuf.substr(24,8); string ystr = sbuf.substr(32,8); string zstr = sbuf.substr(40,8); vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str())); atom.SetVector(v); atom.ForceImplH(); // useful for debugging unknown atom types (e.g., PR#1577238) // cout << mol.NumAtoms() + 1 << " : '" << element << "'" << " " << etab.GetAtomicNum(element.c_str()) << endl; if (elementFound) atom.SetAtomicNum(etab.GetAtomicNum(element.c_str())); else // use our old-style guess from athe atom type atom.SetAtomicNum(etab.GetAtomicNum(type.c_str())); if ( (! scharge.empty()) && " " != scharge ) { if ( isdigit(scharge[0]) && ('+' == scharge[1] || '-' == scharge[1]) ) { const char reorderCharge[3] = { scharge[1], scharge[0], '\0' }; const int charge = atoi(reorderCharge); atom.SetFormalCharge(charge); } else { stringstream errorMsg; errorMsg << "WARNING: Problems reading a PDB file\n" << " Problems reading a HETATM or ATOM record.\n" << " According to the PDB specification,\n" << " columns 79-80 should contain charge of the atom\n" << " but OpenBabel found '" << scharge << "' (atom " << mol.NumAtoms()+1 << ")."; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); } } else { atom.SetFormalCharge(0); } /* residue sequence number */ string resnum = sbuf.substr(16,4); OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL; if (res == NULL || res->GetName() != resname || res->GetNumString() != resnum || res->GetChain() != chain) { vector<OBResidue*>::iterator ri; for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) if (res->GetName() == resname && res->GetNumString() == resnum && static_cast<int>(res->GetChain()) == chain) break; if (res == NULL) { res = mol.NewResidue(); res->SetChain(chain); res->SetName(resname); res->SetNum(resnum); } } if (!mol.AddAtom(atom)) return(false); else { OBAtom *atom = mol.GetAtom(mol.NumAtoms()); res->AddAtom(atom); res->SetSerialNum(atom, atoi(serno.c_str())); res->SetAtomID(atom, sbuf.substr(6,4)); res->SetHetAtom(atom, hetatm); return(true); } } // end reading atom records
int main(int argc,char **argv) { char *program_name= argv[0]; int c; char *FileIn = NULL; if (argc != 2) { string err = "Usage: "; err += program_name; err += " <filename>\n" "Output format:\n" "name NAME\n" "formula FORMULA\n" "mol_weight MOLECULAR_WEIGHT\n" "exact_mass ISOTOPIC MASS\n" "canonical_SMILES STRING\n" "InChI STRING\n" "num_atoms NUM\n" "num_bonds NUM\n" "num_residues NUM\n" "num_rotors NUM\n" "sequence RESIDUE_SEQUENCE\n" "num_rings NUMBER_OF_RING_(SSSR)\n" "logP NUM\n" "PSA POLAR_SURFACE_AREA\n" "MR MOLAR REFRACTIVITY"; err += "$$$$"; // ThrowError(err); wasn't being output because error level too low cerr << err; //Why not do directly exit(-1); } else { FileIn = argv[1]; } // Find Input filetype OBConversion conv; OBFormat *format = conv.FormatFromExt(FileIn); if (!format || !conv.SetInFormat(format)) { cerr << program_name << ": cannot read input format!" << endl; exit (-1); } ifstream ifs; // Read the file ifs.open(FileIn); if (!ifs) { cerr << program_name << ": cannot read input file!" << endl; exit (-1); } OBMol mol; OBFormat *canSMIFormat = conv.FindFormat("can"); OBFormat *inchiFormat = conv.FindFormat("inchi"); //////////////////////////////////////////////////////////////////////////// // List of properties // Name // Molecular weight (Standard molar mass given by IUPAC atomic masses) // Number of rings : the size of the smallest set of smallest rings (SSSR) //.....ADD YOURS HERE..... for (c = 1;; ++c) { mol.Clear(); conv.Read(&mol, &ifs); if (mol.Empty()) break; if (!mol.HasHydrogensAdded()) mol.AddHydrogens(); // Print the properties if (strlen(mol.GetTitle()) != 0) cout << "name " << mol.GetTitle() << endl; else cout << "name " << FileIn << " " << c << endl; cout << "formula " << mol.GetFormula() << endl; cout << "mol_weight " << mol.GetMolWt() << endl; cout << "exact_mass " << mol.GetExactMass() << endl; string smilesString = "-"; if (canSMIFormat) { conv.SetOutFormat(canSMIFormat); smilesString = conv.WriteString(&mol); if ( smilesString.length() == 0 ) { smilesString = "-"; } } cout << "canonical_SMILES " << smilesString << endl; string inchiString = "-"; if (inchiFormat) { conv.SetOutFormat(inchiFormat); inchiString = conv.WriteString(&mol); if ( inchiString.length() == 0 ) { inchiString = "-"; } } cout << "InChI " << inchiString << endl; cout << "num_atoms " << mol.NumAtoms() << endl; cout << "num_bonds " << mol.NumBonds() << endl; cout << "num_residues " << mol.NumResidues() << endl; cout << "num_rotors " << mol.NumRotors() << endl; if (mol.NumResidues() > 0) cout << "sequence " << sequence(mol) << endl; else cout << "sequence " << "-" << endl; cout << "num_rings " << nrings(mol) << endl; OBDescriptor* pDesc; pDesc= OBDescriptor::FindType("logP"); if(pDesc) cout << "logP " << pDesc->Predict(&mol) << endl; pDesc = OBDescriptor::FindType("TPSA"); if(pDesc) cout << "PSA " << pDesc->Predict(&mol) << endl; pDesc = OBDescriptor::FindType("MR"); if(pDesc) cout << "MR " << pDesc->Predict(&mol) << endl; cout << "$$$$" << endl; // SDF like end of compound descriptor list //Other OBDescriptors could be output here, even ones that were rarely // used. Since these are plugin classes, they may not be loaded, but // then with code like the above they are just ignored. } // end for loop return(0); }