bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) { std::istream &ifs = *pConv->GetInStream(); if (!ifs.good()) //Possible to omit? ifs.peek() == EOF || return false; OBMol* pmol = new OBMol; std::string auditMsg = "OpenBabel::Read molecule "; std::string description(pFormat->Description()); auditMsg += description.substr(0,description.find('\n')); obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg); if(pConv->IsOption("C",OBConversion::GENOPTIONS)) return DeferMolOutput(pmol, pConv, pFormat); bool ret; if(pConv->IsOption("separate",OBConversion::GENOPTIONS)) { //On first call, separate molecule and put fragments in MolArray. //On subsequent calls, remove a fragment from MolArray and send it for writing //Done this way so that each fragment can be written to its own file (with -m option) if(!StoredMolsReady) { ret = pFormat->ReadMolecule(pmol,pConv); if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK))) MolArray = pmol->Separate(); //use un-transformed molecule //Add an appropriate title to each fragment for(int i=0;i<MolArray.size();++i) { stringstream ss; ss << pmol->GetTitle() << '#' << i+1; string title = ss.str(); MolArray[i].SetTitle(title); } reverse(MolArray.begin(),MolArray.end()); StoredMolsReady = true; //Clear the flags of the input stream(which may have found eof) to ensure will //try to read anothe molecule and allow the stored ones to be sent for output. pConv->GetInStream()->clear(); } if(MolArray.empty()) //normal end of fragments ret =false; else { // Copying is needed because the OBMol passed to AddChemObject will be deleted. // The OBMol in the vector is deleted here. OBMol* pMolCopy = new OBMol( MolArray.back()); MolArray.pop_back(); ret = pConv->AddChemObject( pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))!=0; } if(!ret) StoredMolsReady = false; delete pmol; return ret; } ret=pFormat->ReadMolecule(pmol,pConv); OBMol* ptmol = NULL; //Molecule is valid if it has some atoms //or the format allows zero-atom molecules and it has a title if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle()))) { ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))); if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) || pConv->IsOption("join",OBConversion::GENOPTIONS))) { //With j option, accumulate all mols in one stored in this class if(pConv->IsFirstInput()) _jmol = new OBMol; pConv->AddChemObject(_jmol); //will be discarded in WriteChemObjectImpl until the last input mol. This complication //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object //is zeroed at the end of a file and _jmol is in danger of not being output. *_jmol += *ptmol; delete ptmol; return true; } } else delete pmol; // Normal operation - send molecule to be written ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading return ret; }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); OBConversion conv; OBFormat *inFormat, *canFormat; OBMol mol; ifstream ifs; vector<OBMol> fragments; unsigned int fragmentCount = 0; // track how many in library -- give a running count map<string, int> index; // index of cansmi string currentCAN; unsigned int size; OBAtom *atom; OBBond *bond; bool nonRingAtoms, nonRingBonds; char buffer[BUFF_SIZE]; canFormat = conv.FindFormat("can"); conv.SetOutFormat(canFormat); if (argc < 2) { cout << "Usage: obfragment <file>" << endl; return(-1); } for (int i = 1; i < argc; i++) { cerr << " Reading file " << argv[i] << endl; inFormat = conv.FormatFromExt(argv[i]); if(inFormat==NULL || !conv.SetInFormat(inFormat)) { cerr << " Cannot read file format for " << argv[i] << endl; continue; // try next file } ifs.open(argv[i]); if (!ifs) { cerr << "Cannot read input file: " << argv[i] << endl; continue; } while(ifs.peek() != EOF && ifs.good()) { conv.Read(&mol, &ifs); if (!mol.Has3D()) continue; // invalid coordinates! mol.DeleteHydrogens(); // remove these before we do anything else do { nonRingAtoms = false; size = mol.NumAtoms(); for (unsigned int i = 1; i <= size; ++i) { atom = mol.GetAtom(i); if (!atom->IsInRing()) { mol.DeleteAtom(atom); nonRingAtoms = true; break; // don't know how many atoms there are } // Previously, we changed atoms to carbon here. // Now we perform this alchemy in terms of string-rewriting // once the canonical SMILES is generated } } while (nonRingAtoms); if (mol.NumAtoms() < 3) continue; if (mol.NumBonds() == 0) continue; do { nonRingBonds = false; size = mol.NumBonds(); for (unsigned int i = 0; i < size; ++i) { bond = mol.GetBond(i); if (!bond->IsInRing()) { mol.DeleteBond(bond); nonRingBonds = true; break; // don't know how many bonds there are } } } while (nonRingBonds); fragments = mol.Separate(); for (unsigned int i = 0; i < fragments.size(); ++i) { if (fragments[i].NumAtoms() < 3) // too small to care continue; currentCAN = conv.WriteString(&fragments[i], true); currentCAN = RewriteSMILES(currentCAN); // change elements to "a/A" for compression if (index.find(currentCAN) != index.end()) { // already got this index[currentCAN] += 1; // add to the count for bookkeeping continue; } index[currentCAN] = 1; // don't ever write this ring fragment again // OK, now retrieve the canonical ordering for the fragment vector<string> canonical_order; if (fragments[i].HasData("Canonical Atom Order")) { OBPairData *data = (OBPairData*)fragments[i].GetData("Canonical Atom Order"); tokenize(canonical_order, data->GetValue().c_str()); } // Write out an XYZ-style file with the CANSMI as the title cout << fragments[i].NumAtoms() << '\n'; cout << currentCAN << '\n'; // endl causes a flush vector<string>::iterator can_iter; unsigned int order; OBAtom *atom; fragments[i].Center(); fragments[i].ToInertialFrame(); for (unsigned int index = 0; index < canonical_order.size(); ++index) { order = atoi(canonical_order[index].c_str()); atom = fragments[i].GetAtom(order); snprintf(buffer, BUFF_SIZE, "C%8.3f%8.3f%8.3f\n", atom->x(), atom->y(), atom->z()); cout << buffer; } } fragments.clear(); if (index.size() > fragmentCount) { fragmentCount = index.size(); cerr << " Fragments: " << fragmentCount << endl; } } // while reading molecules (in this file) ifs.close(); ifs.clear(); } // while reading files // loop through the map and output frequencies map<string, int>::const_iterator indexItr; for (indexItr = index.begin(); indexItr != index.end(); ++indexItr) { cerr << (*indexItr).second << " INDEX " << (*indexItr).first << "\n"; } return(0); }