Exemplo n.º 1
0
/*
  operates on normalized and localized sequences and patterns only
  
  alphabet can be modified
 */
list<string> extendSequence(const list<string>& seq, 
			    string pat) {

  list<string> results;

  std::string result;
  boost::regex_constants::syntax_option_type flags = 
    boost::regex_constants::perl;
  boost::regex re;
  boost::smatch what;

  pat = "^" + pat;
  re.assign(pat, flags);
  for (list<string>::const_iterator it = seq.begin(); it!=seq.end(); it++) {
    string activeSeq = *it;

    Normalizer n;
    n.norm(activeSeq);
    string input = n.getInput() + "z";

    for (int i=0; i<input.length(); i++) {
      string ref = activeSeq+input[i]+"~~~~~~~~~~~~~~~~~~";
      DBG cout << "pattern " << pat << " against " << ref << endl;
      try {
	if (regex_search(ref,what,re)) {
	  string m = what[0];
	  if (m.length()>activeSeq.length()) {
	    string bit;
	    bit += input[i];
	    string ext = activeSeq + n.norm(bit);
	    DBG cout << "  got something " << ext << endl;
	    results.push_back(string(ext));
	  }
	}
      } catch(boost::regex_error& regErr) {
	cerr << "regular expression failed: " << regErr.what() << endl;
      }
    }
  }

  return results;
}
Exemplo n.º 2
0
std::string sequenceToPattern(std::string seq) {
  checkSequence(seq);

  Normalizer n;

  std::string sub = "";
  if (seq.length()>10) {
    sub = seq.substr(0,seq.length()-10);
    seq = seq.substr(seq.length()-10,seq.length());
  }

  string pat = n.denorm(submain(n.norm(seq)));
  pat = sub + pat;

  return pat;
}
Exemplo n.º 3
0
std::string extendSequence(std::string seq, std::string pattern, int len) {

  srand (std::time(0));

  checkSequence(seq);

  Normalizer n;
  int pruneLen = 100;

  seq = n.norm(seq);
  pattern = n.norm(pattern);
  string refPattern = pattern;
  pattern = localizePattern(pattern);

  DBG cout << "extend " << seq << " with " << pattern << endl;

  n.add("ABCDEFGHIJKLMNOPQRSTUVWXYZ");

  list<string> lst;
  lst.push_back(seq);
  for (int k=0; k<len; k++) {
    lst = extendSequence(lst,pattern);

    
    list<string> lst2;
    for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); 
	 it++) {
      string nextSeq = *it;
      string nextPat = refPattern;
      if (nextSeq.length()<=10) {
	nextPat = sequenceToPattern(nextSeq);
      }
      if (nextPat==refPattern) {
	lst2.push_back(nextSeq);
      } else {
	DBG cout << nextSeq << ": " << "mismatch " << nextPat << " versus " << refPattern << endl;
      }
    }
    if (lst2.size()>0) {
      lst = lst2;
    }


    if (lst.size()>pruneLen) {
      DBG cout << "NEED TO PRUNE" << endl;
      vector<string> v(lst.begin(),lst.end());
      random_shuffle(v.begin(),v.end());
      lst.clear();
      lst = list<string>(v.begin(),v.begin()+pruneLen);
      //lst.erase((++(++(lst.begin()))),lst.end());
    }
    DBG {
      cout << "possibilities: " << endl;
      for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); 
	   it++) {
	cout << " -- " << n.denorm(*it) << endl;
      }
    }
  }

  for (list<string>::const_iterator it = lst.begin(); it!=lst.end(); 
       it++) {
    DBG cout << "  final possibility " << n.denorm(*it) << endl;
  }
  vector<string> v(lst.begin(),lst.end());
  random_shuffle(v.begin(),v.end());
  if (v.size()>0) {
    string result = n.denorm(v[0]);
    result = result.substr(seq.length(),result.length());
    return result;
  }

  return "";
}