std::string sequenceToPattern(std::string seq) { checkSequence(seq); Normalizer n; std::string sub = ""; if (seq.length()>10) { sub = seq.substr(0,seq.length()-10); seq = seq.substr(seq.length()-10,seq.length()); } string pat = n.denorm(submain(n.norm(seq))); pat = sub + pat; return pat; }
std::string extendSequence(std::string seq, std::string pattern, int len) { srand (std::time(0)); checkSequence(seq); Normalizer n; int pruneLen = 100; seq = n.norm(seq); pattern = n.norm(pattern); string refPattern = pattern; pattern = localizePattern(pattern); DBG cout << "extend " << seq << " with " << pattern << endl; n.add("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); list<string> lst; lst.push_back(seq); for (int k=0; k<len; k++) { lst = extendSequence(lst,pattern); list<string> lst2; for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); it++) { string nextSeq = *it; string nextPat = refPattern; if (nextSeq.length()<=10) { nextPat = sequenceToPattern(nextSeq); } if (nextPat==refPattern) { lst2.push_back(nextSeq); } else { DBG cout << nextSeq << ": " << "mismatch " << nextPat << " versus " << refPattern << endl; } } if (lst2.size()>0) { lst = lst2; } if (lst.size()>pruneLen) { DBG cout << "NEED TO PRUNE" << endl; vector<string> v(lst.begin(),lst.end()); random_shuffle(v.begin(),v.end()); lst.clear(); lst = list<string>(v.begin(),v.begin()+pruneLen); //lst.erase((++(++(lst.begin()))),lst.end()); } DBG { cout << "possibilities: " << endl; for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); it++) { cout << " -- " << n.denorm(*it) << endl; } } } for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); it++) { DBG cout << " final possibility " << n.denorm(*it) << endl; } vector<string> v(lst.begin(),lst.end()); random_shuffle(v.begin(),v.end()); if (v.size()>0) { string result = n.denorm(v[0]); result = result.substr(seq.length(),result.length()); return result; } return ""; }