dDeterministicFiniteAutonata::dDeterministicFiniteAutonata(const char* const regularExpression) :dFiniteAutomata() ,m_startState(NULL) { dNonDeterministicFiniteAutonata nfa (regularExpression); CreateDeterministicFiniteAutomaton(nfa); }
int main() { InputTokenizer inpTok; inpTok.parseFile("input.txt"); NFABuilder nfa(inpTok); nfa.test(); return 0; }
void eskoTest() { unsigned step = 5000; unsigned start = 30000; unsigned stop = 120000; unsigned topL = 300; unsigned minL = 5; for(unsigned cur = start; cur <= stop; cur += step) { takeSubstring("data/ecoli.seq", "data/tmpseq.seq", 0, cur); //takeSubstring("data/ecoli.seq", "data/tmpseq.seq", 278000, 293000); unsigned L = minL; unsigned R = topL; unsigned ans = 0U; while (L<=R) { unsigned mid = (L+R) / 2U; unsigned k = mid-1U; Genome genome; readGenome("data/tmpseq.seq", mid, genome); AhoCorasick * aho = new AhoCorasick(genome); aho->filterOverlaps(k); NFA_Automata nfa(*aho, genome, 0, genome.generatedReads.size()-1); delete aho; DFA_Automata dfa(nfa, genome, 1.0); // remove reads since we do not need them anymore genome.generatedReads.resize(0); genome.sequence.resize(0); bool uniqueOk = dfa.isCOAUnique(); if (uniqueOk) { R = mid-1U; ans = mid; } else { L = mid+1U; } } std::string seq = readSequence("data/tmpseq.seq"); std::cout << cur << " " << ans << " " << getLongestSingleRepeat(seq) << std::endl; } }
int main() { XYZ_NBA_Generator nba_generator; NFA_Generator<XYZ_NBA_Generator::Representation_t> nfa_generator; CPP_Code_Generator<XYZ_NBA_Generator::Representation_t> code_generator; //XYZ_NBA_Generator::Representation_t == Graphviz_Representation. Nondet_Buechi_Automonton<XYZ_NBA_Generator::Representation_t> * nba(nba_generator.translate("[]<>p")); Nondet_Finite_Automonton<XYZ_NBA_Generator::Representation_t> * nfa(nfa_generator.translate(nba)); code_generator->generate(nfa, ::std::cout); delete nba; delete nfa; return 0; }
void MaxMeaningfulClustering::build_merge_info(t_float *Z, t_float *X, int N, int dim, bool use_full_merge_rule, vector<HCluster> *merge_info, vector< vector<int> > *meaningful_clusters) { // walk the whole dendogram for (int i=0; i<(N-1)*4; i=i+4) { HCluster cluster; cluster.num_elem = Z[i+3]; //number of elements int node1 = Z[i]; int node2 = Z[i+1]; float dist = Z[i+2]; if (node1<N) { vector<float> point; for (int n=0; n<dim; n++) point.push_back(X[node1*dim+n]); cluster.points.push_back(point); cluster.elements.push_back((int)node1); } else { for (int i=0; i<merge_info->at(node1-N).points.size(); i++) { cluster.points.push_back(merge_info->at(node1-N).points[i]); cluster.elements.push_back(merge_info->at(node1-N).elements[i]); } //update the extended volume of node1 using the dist where this cluster merge with another merge_info->at(node1-N).dist_ext = dist; } if (node2<N) { vector<float> point; for (int n=0; n<dim; n++) point.push_back(X[node2*dim+n]); cluster.points.push_back(point); cluster.elements.push_back((int)node2); } else { for (int i=0; i<merge_info->at(node2-N).points.size(); i++) { cluster.points.push_back(merge_info->at(node2-N).points[i]); cluster.elements.push_back(merge_info->at(node2-N).elements[i]); } //update the extended volume of node2 using the dist where this cluster merge with another merge_info->at(node2-N).dist_ext = dist; } Minibox mb; for (int i=0; i<cluster.points.size(); i++) { mb.check_in(&cluster.points.at(i)); } cluster.dist = dist; cluster.volume = mb.volume(); if (cluster.volume >= 1) cluster.volume = 0.999999; if (cluster.volume == 0) cluster.volume = 0.001; //TODO is this the minimum we can get? cluster.volume_ext=1; if (node1>=N) { merge_info->at(node1-N).volume_ext = cluster.volume; } if (node2>=N) { merge_info->at(node2-N).volume_ext = cluster.volume; } cluster.node1 = node1; cluster.node2 = node2; merge_info->push_back(cluster); } for (int i=0; i<merge_info->size(); i++) { merge_info->at(i).nfa = nfa(merge_info->at(i).volume, merge_info->at(i).volume_ext, merge_info->at(i).num_elem, N); int node1 = merge_info->at(i).node1; int node2 = merge_info->at(i).node2; { if ((node1<N)&&(node2<N)) { //els dos nodes son single samples (nfa=1) per tant aquest cluster es maxim merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; } else { if ((node1>=N)&&(node2>=N)) { //els dos nodes son "sets" per tant hem d'avaluar el merging condition if ( ( (use_full_merge_rule) && ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + merge_info->at(node2-N).nfa) && (merge_info->at(i).nfa<min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch))) ) || ( (!use_full_merge_rule) && ((merge_info->at(i).nfa<min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch))) ) ) { merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; for (int k =0; k<merge_info->at(node1-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; for (int k =0; k<merge_info->at(node2-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; } else { merge_info->at(i).max_meaningful = false; merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); if (merge_info->at(i).nfa<min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch)) merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; else merge_info->at(i).min_nfa_in_branch = min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch); } } else { //un dels nodes es un "set" i l'altre es un single sample, s'avalua el merging condition pero amb compte if (node1>=N) { if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + 1) && (merge_info->at(i).nfa<merge_info->at(node1-N).min_nfa_in_branch)) { merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; for (int k =0; k<merge_info->at(node1-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; } else { merge_info->at(i).max_meaningful = false; merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node1-N).min_nfa_in_branch); } } else { if ((merge_info->at(i).nfa < merge_info->at(node2-N).nfa + 1) && (merge_info->at(i).nfa<merge_info->at(node2-N).min_nfa_in_branch)) { merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; for (int k =0; k<merge_info->at(node2-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; } else { merge_info->at(i).max_meaningful = false; merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node2-N).min_nfa_in_branch); } } } } } } for (int i=0; i<merge_info->size(); i++) { if (merge_info->at(i).max_meaningful) { vector<int> cluster; for (int k=0; k<merge_info->at(i).elements.size();k++) cluster.push_back(merge_info->at(i).elements.at(k)); meaningful_clusters->push_back(cluster); } } }
void MaxMeaningfulClustering::build_merge_info(t_float *Z, int N, vector<HCluster> *merge_info, vector< vector<int> > *meaningful_clusters) { // walk the whole dendogram for (int i=0; i<(N-1)*4; i=i+4) { HCluster cluster; cluster.num_elem = Z[i+3]; //number of elements int node1 = Z[i]; int node2 = Z[i+1]; float dist = Z[i+2]; if (dist != dist) //this is to avoid NaN values dist=0; //fprintf(stderr," merging %d %d\n",node1,node2); if (node1<N) { cluster.elements.push_back((int)node1); } else { for (int i=0; i<merge_info->at(node1-N).elements.size(); i++) { cluster.elements.push_back(merge_info->at(node1-N).elements[i]); } } if (node2<N) { cluster.elements.push_back((int)node2); } else { for (int i=0; i<merge_info->at(node2-N).elements.size(); i++) { cluster.elements.push_back(merge_info->at(node2-N).elements[i]); } } cluster.dist = dist; if (cluster.dist >= 1) cluster.dist = 0.999999; if (cluster.dist == 0) cluster.dist = 1.e-25; //TODO is this the minimum we can get? cluster.dist_ext = 1; if (node1>=N) { merge_info->at(node1-N).dist_ext = cluster.dist; } if (node2>=N) { merge_info->at(node2-N).dist_ext = cluster.dist; } cluster.node1 = node1; cluster.node2 = node2; merge_info->push_back(cluster); } //print all merge info //cout << "---------------------------------------------------------" << endl; //cout << "-- MERGE INFO ---- Evidence Accumulation " << endl; //cout << "---------------------------------------------------------" << endl; for (int i=0; i<merge_info->size(); i++) { merge_info->at(i).nfa = nfa(merge_info->at(i).dist, merge_info->at(i).dist_ext, merge_info->at(i).num_elem, N); int node1 = merge_info->at(i).node1; int node2 = merge_info->at(i).node2; { if ((node1<N)&&(node2<N)) { //els dos nodes son single samples (nfa=1) per tant aquest cluster es maxim merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; //fprintf(stderr,"%d = (%d,%d) els dos nodes son single samples (nfa=1) per tant aquest merge_info->at(i) es maxim min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); } else { if ((node1>=N)&&(node2>=N)) { //els dos nodes son "sets" per tant hem d'avaluar el merging condition if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + merge_info->at(node2-N).nfa) && (merge_info->at(i).nfa<min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch))) { //fprintf(stderr,"%d = (%d,%d) MAX because merging condition 1 (%d < %d + %d ) && (%d<min(%d,%d)) \n",i,node1-N,node2-N,merge_info->at(i).nfa,merge_info->at(node1-N).nfa, merge_info->at(node2-N).nfa, merge_info->at(i).nfa, merge_info->at(node1-N).nfa,merge_info->at(node2-N).nfa); merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; for (int k =0; k<merge_info->at(node1-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; for (int k =0; k<merge_info->at(node2-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; //fprintf(stderr," min_nfa_in_branch = %d \n",merge_info->at(i).min_nfa_in_branch); } else { merge_info->at(i).max_meaningful = false; merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); if (merge_info->at(i).nfa<min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch)) merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; else merge_info->at(i).min_nfa_in_branch = min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch); //fprintf(stderr,"%d = (%d,%d) NONmax min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); } } else { //un dels nodes es un "set" i l'altre es un single sample, s'avalua el merging condition pero amb compte if (node1>=N) { if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + 1) && (merge_info->at(i).nfa<merge_info->at(node1-N).min_nfa_in_branch)) { //fprintf(stderr,"%d = (%d,%d) MAX because merging condition 2 (%d < %d + 1 ) && (%d<%d) \n",i,node1-N,node2-N,merge_info->at(i).nfa,merge_info->at(node1-N).nfa, merge_info->at(i).nfa, merge_info->at(node1-N).min_nfa_in_branch); merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; for (int k =0; k<merge_info->at(node1-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; } else { merge_info->at(i).max_meaningful = false; merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node1-N).min_nfa_in_branch); //fprintf(stderr,"%d = (%d,%d) NONmax2 min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); } } else { if ((merge_info->at(i).nfa < merge_info->at(node2-N).nfa + 1) && (merge_info->at(i).nfa<merge_info->at(node2-N).min_nfa_in_branch)) { //fprintf(stderr,"%d = (%d,%d) MAX because merging condition 3 (%d < %d + 1 ) && (%d<%d) \n ",i,node1-N,node2-N,merge_info->at(i).nfa,merge_info->at(node2-N).nfa, merge_info->at(i).nfa, merge_info->at(node2-N).min_nfa_in_branch); merge_info->at(i).max_meaningful = true; merge_info->at(i).max_in_branch.push_back(i); merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; for (int k =0; k<merge_info->at(node2-N).max_in_branch.size(); k++) merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; } else { merge_info->at(i).max_meaningful = false; merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node2-N).min_nfa_in_branch); //fprintf(stderr,"%d = (%d,%d) NONmax3 min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); } } } } } } for (int i=0; i<merge_info->size(); i++) { if (merge_info->at(i).max_meaningful) { vector<int> cluster; for (int k=0; k<merge_info->at(i).elements.size();k++) cluster.push_back(merge_info->at(i).elements.at(k)); meaningful_clusters->push_back(cluster); } } }