void testAutomorphismMask() { // read file: 3 6-rings // // /\ /\ /\ // | | | | // \/ \/ \/ // cout << "testAutomorphismMask" << endl; OBMol mol; OBConversion conv; conv.SetInFormat("cml"); std::ifstream ifs(OBTestUtil::GetFilename("isomorphism1.cml").c_str()); OB_REQUIRE( ifs ); conv.Read(&mol, &ifs); OBIsomorphismMapper::Mappings maps; // First of all, how many automorphisms are there without any mask? // This takes about 20 seconds, so you may want to comment this out while debugging FindAutomorphisms(&mol, maps); cout << maps.size() << endl; OB_ASSERT( maps.size() == 4 ); // Now, let's remove the bridge (atomId 6) of the central ring. // // /\ /\ /\ // | | | | // \/ \/ // both rings can be flipped around exocyclic bond, the whole molecule can be mirrored // horizontally, this results in 2 x 2 x 2 = 8 automorphisms OBBitVec mask; mask.SetRangeOn(1, mol.NumAtoms()); mask.SetBitOff(6+1); FindAutomorphisms(&mol, maps, mask); cout << maps.size() << endl; for (unsigned int i = 0; i < maps.size(); ++i) { OBIsomorphismMapper::Mapping::const_iterator j; for (j = maps[i].begin(); j != maps[i].end(); ++j) cout << j->second << " "; cout << endl; } OB_ASSERT( maps.size() == 8 ); // Verify that atom Id 6 does not occur anywhere in the mappings OBIsomorphismMapper::Mappings::const_iterator a; OBIsomorphismMapper::Mapping::const_iterator b; for (a = maps.begin(); a != maps.end(); ++a) for (b = a->begin(); b!= a->end(); ++b) { OB_ASSERT( b->first != 6 ); OB_ASSERT( b->second != 6 ); } }
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; // The SMARTS and any other parameters are extracted on the first molecule // and stored in the member variables. The parameter is cleared so that // the original -s option in transform.cpp is inactive //string txt(pmap->find(GetID())->second); // ID can be "s" or "v" vector<OBQuery*>::iterator qiter; if(OptionText && *OptionText)//(!pConv || pConv->IsFirstInput()) { //Set up on first call queries.clear(); query=NULL; nPatternAtoms=0; inv=false; tokenize(vec, OptionText); inv = GetID()[0]=='v'; if(vec[0][0]=='~') { inv = true; vec[0].erase(0,1); } //Do not filter out any molecules if there is a parameter "showall"; //allows -s option to be used for highlighting substructures (--highlight also does this) vector<string>::iterator it = std::remove(vec.begin(), vec.end(),"showall"); showAll = it != vec.end(); if(showAll) vec.erase(it); //Store the number of matches required, if as a number in the second parameter, else 0. nmatches = 0; comparechar = '\0'; if(vec.size()>1) { comparechar = vec[1][0]; if(comparechar=='>' || comparechar=='<') vec[1].erase(0,1); else comparechar = '\0'; nmatches = atoi(vec[1].c_str()); if(nmatches) //remove this parameter to still allow coloring vec.erase(vec.begin()+1); } //Interpret as a filename if possible MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms, strstr(OptionText,"noH")); vec.erase(remove(vec.begin(),vec.end(),"noH"),vec.end());//to prevent "noH2" being seen as a color if(queries.empty()) { //SMARTS supplied // Explicit H in SMARTS requires explicit H in the molecule. // Calling AddHydrogens() on a copy of the molecule is done in parsmart.cpp // only when SMARTS contains [H]. Doing more has complications with atom typing, // so AddHydrogens here on the molecule (not a copy) when #1 detected. addHydrogens = (vec[0].find("#1]")!=string::npos); // If extra target mols have been supplied, make a composite SMARTS // to test for any of the targets. if(ExtraMols.size()>0) { for(unsigned i=0;i<ExtraMols.size();++i) { OBConversion extraConv; extraConv.AddOption("h"); if(!extraConv.SetOutFormat("smi")) return false; // Add option which avoids implicit H being added to the SMARTS. // The parameter must be present but can be anything. extraConv.AddOption("h",OBConversion::OUTOPTIONS, "X"); xsmarts += ",$(" + extraConv.WriteString(ExtraMols[i], true) + ")"; } } string ysmarts = xsmarts.empty() ? vec[0] : "[$(" + vec[0] + ")" + xsmarts +"]"; xsmarts.clear(); if(!sp.Init(ysmarts)) { string msg = ysmarts + " cannot be interpreted as either valid SMARTS " "or the name of a file with an extension known to OpenBabel " "that contains one or more pattern molecules."; obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly); delete pmol; pmol = NULL; pConv->SetOneObjectOnly(); //stop conversion return false; } } else { // Target is in a file. Add extra targets if any supplied for(unsigned i=0;i<ExtraMols.size();++i) queries.push_back(CompileMoleculeQuery(static_cast<OBMol*>(ExtraMols[i]))); ExtraMols.clear(); } if(vec.size()>1 && vec[1]=="exact") { if(queries.empty()) { //Convert SMARTS to SMILES to count number of atoms OBConversion conv; OBMol patmol; if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, " "which has to be valid SMILES when the exact option is used.", obError, onceOnly); delete pmol; if(pConv) pConv->SetOneObjectOnly(); //stop conversion return false; } nPatternAtoms = patmol.NumHvyAtoms(); } } else nPatternAtoms = 0; //disable old versions if(pConv) pConv->AddOption(GetID(), OBConversion::GENOPTIONS, ""); } bool match = false; //These are a vector of each mapping, each containing atom indxs. vector<vector<int> > vecatomvec; vector<vector<int> >* pMappedAtoms = NULL; if(nPatternAtoms) if(pmol->NumHvyAtoms() != nPatternAtoms) return false; unsigned int imol=0; //index of mol in pattern file if(!queries.empty()) //filename supplied { //match is set true if any of the structures match - OR behaviour for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) { OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter); OBIsomorphismMapper::Mappings mappings; mapper->MapUnique(pmol, mappings); if( (match = !mappings.empty()) ) // extra parens to indicate truth value { OBIsomorphismMapper::Mappings::iterator ita; OBIsomorphismMapper::Mapping::iterator itb; for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping { vector<int> atomvec; for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index atomvec.push_back(itb->second+1); vecatomvec.push_back(atomvec); atomvec.clear(); } pMappedAtoms = &vecatomvec; break; } } } else //SMARTS supplied { if(addHydrogens) pmol->AddHydrogens(false,false); if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value { pMappedAtoms = &sp.GetMapList(); if(nmatches!=0) { int n = sp.GetUMapList().size(); if(comparechar=='>') match = (n > nmatches); else if(comparechar=='<') match = (n < nmatches); else match = (n == nmatches); } } } if((!showAll && (!match && !inv)) || (match && inv)) { //delete a non-matching mol delete pmol; pmol = NULL; return false; } if(match) //Copy the idxes of the first match to a member variable so that it can be retrieved from outside firstmatch.assign(pMappedAtoms->begin()->begin(), pMappedAtoms->begin()->end()); else firstmatch.clear(); if(match && !inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms) { vector<vector<int> >::iterator iter; if (vec[1]=="extract" || (vec.size()>3 && vec[2]=="extract")) { //Delete all unmatched atoms. Use only the first match ExtractSubstruct(pmol, *pMappedAtoms->begin()); return true; } // color the substructure if there is a second parameter which is not "exact" or "extract" or "noH" // with multiple color parameters use the one corresponding to the query molecule, or the last if(imol>vec.size()-2) imol = vec.size()-2; for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]); return true; } if(pConv && pConv->IsLast()) { for(qiter=queries.begin();qiter!=queries.end();++qiter) delete *qiter; queries.clear(); } return true; }
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; // The SMARTS and any other parameters are extracted on the first molecule // and stored in the static variables vec, inv. The parameter is cleared so that: // (a) the original -s option in transform.cpp is inactive, and // (b) the parsing does not have to be done again for multi-molecule files string txt(pmap->find(GetID())->second); // ID can be "s" or "v" static vector<string> vec; static bool inv; static int nPatternAtoms; //non-zero for exact matches static OBQuery* query; static vector<OBQuery*> queries; vector<OBQuery*>::iterator qiter; if(!txt.empty()) { //Set up on first call tokenize(vec, txt); inv = GetID()[0]=='v'; if(vec[0][0]=='~') { inv = true; vec[0].erase(0,1); } //Interpret as a filename if possible MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms); if(vec.size()>1 && vec[1]=="exact") { if(queries.empty()) { //Convert SMARTS to SMILES to count number of atoms OBConversion conv; OBMol patmol; if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, " "which has to be valid SMILES when the exact option is used.", obError, onceOnly); delete pmol; pConv->SetOneObjectOnly(); //stop conversion return false; } nPatternAtoms = patmol.NumHvyAtoms(); } } else nPatternAtoms = 0; //disable old versions pConv->AddOption(GetID(), OBConversion::GENOPTIONS, ""); } bool match; //These are a vector of each mapping, each containing atom indxs. vector<vector<int> > vecatomvec; vector<vector<int> >* pMappedAtoms = NULL; OBSmartsPattern sp; if(nPatternAtoms) if(pmol->NumHvyAtoms() != nPatternAtoms) return false; int imol=0; //index of mol in pattern file if(!queries.empty()) //filename supplied { //match is set true if any of the structures match - OR behaviour for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) { OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter); OBIsomorphismMapper::Mappings mappings; mapper->MapUnique(pmol, mappings); if( (match = !mappings.empty()) ) // extra parens to indicate truth value { OBIsomorphismMapper::Mappings::iterator ita; OBIsomorphismMapper::Mapping::iterator itb; for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping { vector<int> atomvec; for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index atomvec.push_back(itb->second+1); vecatomvec.push_back(atomvec); atomvec.clear(); } pMappedAtoms = &vecatomvec; break; } } } else //SMARTS supplied { if(!sp.Init(vec[0])) { string msg = vec[0] + " cannot be interpreted as either valid SMARTS " "or the name of a file with an extension known to OpenBabel " "that contains one or more pattern molecules."; obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly); delete pmol; pmol = NULL; pConv->SetOneObjectOnly(); //stop conversion return false; } if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value pMappedAtoms = &sp.GetMapList(); } if((!match && !inv) || (match && inv)) { //delete a non-matching mol delete pmol; pmol = NULL; return false; } if(!inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms) { vector<vector<int> >::iterator iter; if(vec[1]=="extract") { //Delete all unmatched atoms. Use only the first match ExtractSubstruct(pmol, *pMappedAtoms->begin()); return true; } // color the substructure if there is a second parameter which is not "exact" or "extract" // with multiple color parameters use the one corresponding to the query molecule, or the last if(imol>vec.size()-2) imol = vec.size()-2; for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]); return true; } if(pConv && pConv->IsLast()) { for(qiter=queries.begin();qiter!=queries.end();++qiter) delete *qiter; queries.clear(); } return true; }