bool Chain::substituent_m ( const int& l1, const int& u1, const int& l2, const int& u2, const Chain* c, vector<cMatchType>& pm ) const { if (l1 > u1 && l2 > u2) return true; else if (l2 > u2) return false; else if (l1 > u1) return false; else { if (!pm.empty ()) pm.clear (); // // only handle one part and left recuisively // string type = c->listOfParts[l1]->getDbId (); if (type == "X") { bool rVal = false; for (int cnt =l2; cnt <= u2+1; cnt++) { vector< list< pair<int,int> > > m; bool mok = substituent_m (l1+1, u1, cnt, u2, c, m); if (mok) { for (int cnt2 =0; cnt2 < m.size (); cnt2++) { m[cnt2].push_front (make_pair (l2, cnt-1)); pm.push_back (m[cnt2]); } rVal = true; } else continue; } return rVal; } else if (type == "T") { vector< list< pair<int,int> > > m; bool mok = substituent_m (l1+1, u1, l2+1, u2, c, m); if (mok) { for (int cnt2 =0; cnt2 < m.size (); cnt2++) { m[cnt2].push_front (make_pair (l2, l2)); pm.push_back (m[cnt2]); } return true; } else return false; } else if (type == "Y") { vector< list< pair<int,int> > > m; bool mok = substituent_m (l1+1, u1, l2, u2, c, m); if (mok) { for (int cnt2 =0; cnt2 < m.size (); cnt2++) { m[cnt2].push_front (make_pair (l2, l2-1)); pm.push_back (m[cnt2]); } return true; } else return false; } else if (type == "Z") { bool rVal = false; for (int cnt =l2+1; cnt <= u2+1; cnt++) { vector< list< pair<int,int> > > m; bool mok = substituent_m (l1+1, u1, cnt, u2, c, m); if (mok) { for (int cnt2 =0; cnt2 < m.size (); cnt2++) { m[cnt2].push_front (make_pair(l2, cnt-1)); pm.push_back (m[cnt2]); } rVal = true; } else continue; } return rVal; } else { cout << "\nNo Such Substituent Type!" << endl; terminate (); } } }
bool Chain::substituent_m ( const int& l1, //start pos of c const int& u1, //end pos of c const int& l2, //start pos of this const int& u2, //end pos of this const Chain* c, vector<cMatchType>& result //result ) const { if (!result.empty ()) result.clear (); assert (l1 <= u1 + 1); assert (l2 <= u2 + 1); // take case! l1 = u1+1 and l2 = u2+1 has been ruled out previously if (l1 == u1 + 1) { if (l2 == u2 + 1) return true; else return false; } if (l2 == u2 + 1) { if (l1 == u1 + 1) return true; else { cMatchType res; for (int i=l1; i <= u1; i++) { string partRef = c->listOfParts[i]->getPartRef (); int keyValue = getKeywords (partRef); if (keyValue != -1) { switch (keyValue) { case 0: case 1: {res.push_back (make_pair(l2,u2));break;} default: return false; } } else { string errno ("Invalid Substituent Type: "); errno += partRef + "!"; throw CoreException (errno); } } result.push_back (res); return true; } } // // only handle one part and left recuisively // int startpos, endpos; assert (l1 < c->listOfParts.size ()); string partRef = c->listOfParts[l1]->getPartRef (); string partType = c->listOfParts[l1]->getPartType (); int keyValue = getKeywords (partRef); if (keyValue != -1) { switch (keyValue) { case 0: { startpos = l2; endpos = u2+1; if (!partType.empty ()) { for (int i=l2; i<=u2; i++) if (!type_match (listOfParts[i]->partType, partType)) endpos = i; break; } break; } case 1: { startpos = l2; endpos = u2+1; for (int i=l2; i<=u2; i++) { if (listOfParts[i]->isBinded) endpos = i; break; } if (!partType.empty ()) { for (int i=l2; i<= endpos-1; i++) { if (!type_match (listOfParts[i]->partType, partType)) endpos = i; break; } } break; } case 2: { startpos = l2+1; endpos = u2+1; if (!partType.empty ()) { for (int i=l2; i<=u2; i++) if (!type_match (listOfParts[i]->partType, partType)) endpos = i; break; } break; } case 3: { startpos = l2+1; endpos = u2+1; for (int i=l2; i<=u2; i++) { if (listOfParts[i]->isBinded) endpos = i; break; } if (!partType.empty ()) { for (int i=l2; i<= endpos-1; i++) { if (!type_match (listOfParts[i]->partType, partType)) endpos = i; break; } } break; } case 4: { startpos = endpos = l2+1; if (!type_match (listOfParts[l2]->partType, partType)) endpos = l2; break; } case 5: { startpos = endpos = l2+1; if (listOfParts[l2]->isBinded) endpos = l2; if (!type_match (listOfParts[l2]->partType, partType)) endpos = l2; break; } default: break; } } else { string errno ("Invalid Substituent Type: "); errno += partRef + "!"; throw CoreException (errno); } bool rVal = false; for (int i = startpos; i <= endpos; i++) { vector< list< pair<int,int> > > recursive; bool mok = substituent_m (l1+1, u1, i, u2, c, recursive); if (mok) { if (recursive.size () == 0) { // only push back in matchings of current part cMatchType only_one; only_one.push_back (make_pair (l2, i-1)); result.push_back (only_one); } else { for (int j=0; j < recursive.size (); j++) { recursive[j].push_front (make_pair (l2, i-1)); result.push_back (recursive[j]); } } rVal = true; } else continue; } return rVal; }
bool Chain::match ( const Chain* c, vector<cMatchType>& xyz ) const { // // find parts of non-substituent type // vector<markType> ns_t; vector<cMatchType> asmb; bool start = true; int numParts = c->listOfParts.size (); for (int cnt =0; cnt < listOfParts.size (); cnt++) { Part* p = listOfParts[cnt]; string ctg = p->getPartCategory (); if (ctg != "substituent") { if (start) { ns_t.push_back (make_pair (cnt, 0)); start = false; } } else { ns_t.back ().second = cnt-1; start = true; } } if (ns_t.size () > 0) { vector< markType >* m_pos = new vector< markType > [ns_t.size ()]; // // for each non-substituent part, find its possible // matching points in this chain // int permAll = 0; for (int cnt =0; cnt < ns_t.size (); cnt++) { int start = ns_t[cnt].first; int end = ns_t[cnt].second; int diff = end-start; Part* ps = listOfParts[start]; Part* pe = listOfParts[end]; string mkey = genUnicode (ps, pe); for (string::size_type pos = 0; (pos = unicode.find_first_of (mkey, pos)) != string::npos; pos++) { int spos = count (unicode.begin (), unicode.begin ()+pos, ']'); int epos = spos + diff; m_pos[cnt].push_back (make_pair (spos, epos)); } if (m_pos[cnt].size () == 0) { delete [] m_pos; return 0; } else permAll *= m_pos[cnt].size (); } // // find possible matching combinations // for (int i =0; i < permAll; i++) { int lastp = -1; cMatchType tmp; int divide = i; for (int j=0; j< ns_t.size (); j++) { markType p = m_pos[j][divide % m_pos[j].size ()]; if (p.first <= lastp) { tmp.clear (); break; } else { lastp = p.second; tmp.push_back (p); } divide /= m_pos[j].size (); } assert (tmp.size () == ns_t.size ()); if (!tmp.empty ()) asmb.push_back (tmp); } delete [] m_pos; } // // insert substituent pieces into asmb // for (int cnt =0; cnt < asmb.size (); cnt++) { int lastf1, lastf2; lastf1 = lastf2 = 0; bool found = true; // // *it ==> pair<int,int> // vector<cMatchType> pm; cMatchType::const_iterator it = asmb[cnt].begin (); vector<cMatchType> tmp; bool mok = substituent_m ( 0, ns_t[0].first-1, 0, it->first-1, c, tmp ); if (mok) { for (int i = 0; i < tmp.size (); i++) pm.push_back (tmp[i]); } else continue; // for (int cnt2 =0; cnt2 < ns_t.size (); cnt2++, it++) { int start1 = ns_t[cnt2].first; int end1 = ns_t[cnt2].second; int start2 = it->first; int end2 = it->second; // // add non-substituent type matching // for (int cnt3 =0; cnt3 < pm.size (); cnt3++) { for (int k =start2; k <= end2; k++) pm[cnt3].push_back (make_pair (k,k)); } // // add substituent type matching // bool mok = substituent_m ( lastf1, start1-1, lastf2, start2-1, c, tmp ); if (mok) { vector<cMatchType>::iterator iter = pm.begin (); int k = -1; int currsize = pm.size (); while (++k < currsize) { cMatchType tmp1 = pm[k]; pm.erase (iter + k); // tmp ==> vector<cMatchType> for (int i =0; i < tmp.size (); i++) { cMatchType tmp2 = tmp1; tmp2.insert (tmp2.end (), tmp[i].begin (), tmp[i].end ()); pm.push_back (tmp2); } } } else { found = false; break; } // // update // lastf1 = end1+1; lastf2 = end2+1; } if (!found) continue; else { for (int i =0; i < pm.size (); i++) xyz.push_back (pm[cnt]); } } if (xyz.empty ()) return false; return true; }
bool Chain::match (const Chain* c, cMatchsType& res ) const { // // find parts of non-substituent type // vector<markType> ns_t; vector<cMatchType> asmb; for (int i=0; i < c->listOfParts.size ();) { Part* p = c->listOfParts[i]; string ctg = p->getPartCtg (); int startpos = i; if (ctg != "substituent") { bool found = false; int j = i+1; for (; j < c->listOfParts.size (); j++) { Part* p1 = c->listOfParts[j]; string ctg1 = p1->getPartCtg (); if (ctg1 == "substituent") {found = true;break;} } if (j == c->listOfParts.size ()) found = true; if (found) { ns_t.push_back (make_pair(i, j-1)); i = j+1; } } else i++; } if (ns_t.size () > 0) { vector< markType >* m_pos = new vector< markType > [ns_t.size ()]; // // for each non-substituent part, find its possible // matching points in this chain // int permAll = 1; for (int cnt =0; cnt < ns_t.size (); cnt++) { int start = ns_t[cnt].first; int end = ns_t[cnt].second; int diff = end-start; string mkey = c->genUnicode (start, end, true); for (string::size_type pos = 0; (pos = unicode.find (mkey, pos)) != string::npos; pos++) { int spos = count (unicode.begin (), unicode.begin ()+pos, ']'); int epos = spos + diff; m_pos[cnt].push_back (make_pair (spos, epos)); } if (m_pos[cnt].size () == 0) { delete [] m_pos; return false; } else permAll *= m_pos[cnt].size (); } // // find possible matching combinations // for (int i =0; i < permAll; i++) { int lastp = -1; cMatchType tmp; int divide = i; for (int j=0; j< ns_t.size (); j++) { markType p = m_pos[j][divide % m_pos[j].size ()]; if (p.first <= lastp) { tmp.clear (); break; } else { lastp = p.second; tmp.push_back (p); } divide /= m_pos[j].size (); } assert (tmp.size () == ns_t.size ()); if (!tmp.empty ()) asmb.push_back (tmp); } delete [] m_pos; if (asmb.size () == 0) return false; } // cout << "\nasmb.size () == " << asmb.size () << endl; // insert substituent pieces into asmb // if there are only substituent-type parts if (ns_t.size () == 0) { int l1 = 0; int u1 = static_cast<int> (c->listOfParts.size ())-1; int l2 = 0; int u2 = listOfParts.size ()-1; assert (u1 >= l1); assert (u2 >= l2); bool mok = substituent_m (l1, u1, l2, u2, c, res); if (mok && res.size() > 0) return true; else return false; } else { assert (asmb.size () > 0); // if there are non-substituent-type part for (int i=0; i< asmb.size (); i++) { vector<cMatchType> AssembleMatch; bool found = true; // // *it ==> pair<int,int> // cMatchType::const_iterator it = asmb[i].begin (); // // find matchings of first block of substituent-type parts // if (ns_t[0].first != 0) { vector<cMatchType> tmp; bool mok = substituent_m ( 0, ns_t[0].first-1, 0, it->first-1, c, tmp ); if (mok && tmp.size () > 0) { for (int j = 0; j < tmp.size (); j++) AssembleMatch.push_back (tmp[j]); } else return false; } int l1, u1, l2, u2; l1 = u1 = l2 = u2 = 0; for (int j=0; j < ns_t.size (); j++) { // // add non-substituent type matching // if (j == 0 && AssembleMatch.size () == 0) { cMatchType initial; for (int l=it->first; l <= it->second; l++) initial.push_back (make_pair (l,l)); AssembleMatch.push_back (initial); } else { for (int k =0; k < AssembleMatch.size (); k++) for (int l=it->first; l <= it->second; l++) AssembleMatch[k].push_back ( make_pair (l,l) ); } // // add substituent type matching // vector<cMatchType> tmp; l1 = ns_t[j].second+1; if (j == ns_t.size ()-1) u1 = c->listOfParts.size ()-1; else u1 = ns_t[j+1].first-1; l2 = it->second+1; if (j == ns_t.size ()-1) u2 = listOfParts.size ()-1; else u2 = (++it)->first-1; //it add one object // no substituent-type parts if (j == ns_t.size ()-1) if (u1 < l1 && u2 < l2) continue; bool mok = substituent_m (l1, u1, l2, u2, c, tmp); if (mok && tmp.size () > 0) { vector<cMatchType> __matchExp; for (int k =0; k < AssembleMatch.size ();k++) { for (int l=0; l< tmp.size (); l++) { cMatchType expandMatch = AssembleMatch[k]; list<markType>::iterator iter = tmp[l].begin (); while (iter != tmp[l].end ()) expandMatch.push_back (*iter++); __matchExp.push_back (expandMatch); } } AssembleMatch.clear (); AssembleMatch = __matchExp; } else { found = false; break; } } if (!found) continue; else { for (int j=0; j < AssembleMatch.size (); j++) res.push_back (AssembleMatch[j]); } } if (res.empty ()) return false; return true; } }