Esempio n. 1
0
std::string sequenceToPattern(std::string seq) {
  checkSequence(seq);

  Normalizer n;

  std::string sub = "";
  if (seq.length()>10) {
    sub = seq.substr(0,seq.length()-10);
    seq = seq.substr(seq.length()-10,seq.length());
  }

  string pat = n.denorm(submain(n.norm(seq)));
  pat = sub + pat;

  return pat;
}
Esempio n. 2
0
std::string extendSequence(std::string seq, std::string pattern, int len) {

  srand (std::time(0));

  checkSequence(seq);

  Normalizer n;
  int pruneLen = 100;

  seq = n.norm(seq);
  pattern = n.norm(pattern);
  string refPattern = pattern;
  pattern = localizePattern(pattern);

  DBG cout << "extend " << seq << " with " << pattern << endl;

  n.add("ABCDEFGHIJKLMNOPQRSTUVWXYZ");

  list<string> lst;
  lst.push_back(seq);
  for (int k=0; k<len; k++) {
    lst = extendSequence(lst,pattern);

    
    list<string> lst2;
    for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); 
	 it++) {
      string nextSeq = *it;
      string nextPat = refPattern;
      if (nextSeq.length()<=10) {
	nextPat = sequenceToPattern(nextSeq);
      }
      if (nextPat==refPattern) {
	lst2.push_back(nextSeq);
      } else {
	DBG cout << nextSeq << ": " << "mismatch " << nextPat << " versus " << refPattern << endl;
      }
    }
    if (lst2.size()>0) {
      lst = lst2;
    }


    if (lst.size()>pruneLen) {
      DBG cout << "NEED TO PRUNE" << endl;
      vector<string> v(lst.begin(),lst.end());
      random_shuffle(v.begin(),v.end());
      lst.clear();
      lst = list<string>(v.begin(),v.begin()+pruneLen);
      //lst.erase((++(++(lst.begin()))),lst.end());
    }
    DBG {
      cout << "possibilities: " << endl;
      for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); 
	   it++) {
	cout << " -- " << n.denorm(*it) << endl;
      }
    }
  }

  for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); 
       it++) {
    DBG cout << "  final possibility " << n.denorm(*it) << endl;
  }
  vector<string> v(lst.begin(),lst.end());
  random_shuffle(v.begin(),v.end());
  if (v.size()>0) {
    string result = n.denorm(v[0]);
    result = result.substr(seq.length(),result.length());
    return result;
  }

  return "";
}