//構築したルールを音声認識エンジンにコミットします。 xreturn::r<bool> JuliusPlus::CommitRule() { if (! this->IsNeedUpdateRule ) {//アップデートする必要なし return true; } //マイクから入力用 std::ofstream dfa("__temp__regexp_test.dfa"); std::ofstream dict("__temp__regexp_test.dict"); this->MakeJuliusRule(this->Grammer,false, &dfa,&dict); //ディクテーションフィルター用 std::ofstream dfaFile("__temp__regexp_test_file.dfa"); std::ofstream dictFile("__temp__regexp_test_file.dict"); this->MakeJuliusRule(this->YobikakeRuleHandle,true,&dfaFile,&dictFile); this->JuliusStop(); this->JuliusFileStart(); this->JuliusStart(); //アップデートが終わったので再びルールに変更が加わるまではアップデートしない。 this->IsNeedUpdateRule = false; return true; }
main(int argc, char **argv) { int i, sw = 0; long minbox = 0L, maxbox = 0L, npts, temp; /* Read and interpret the command line. */ pname = argv[0]; for (i = 1; i < argc && *argv[i] == '-'; i++) { switch(argv[i][1]) { case 'd': /* set nfit (the order of the regression fit) */ if ((nfit = atoi(argv[++i])+1) < 2) error("order must be greater than 0"); break; case 'i': /* input data are already integrated */ iflag = 0; break; case 'l': /* set minbox (the minimum box size) */ minbox = atol(argv[++i]); break; case 'u': /* set maxbox (the maximum box size) */ maxbox = atol(argv[++i]); break; case 's': /* enable sliding window mode */ sw = 1; break; case 'h': /* print usage information and quit */ default: help(); exit(1); } } /* Allocate and fill the input data array seq[]. */ npts = input(); /* Set minimum and maximum box sizes. */ if (minbox < 2*nfit) minbox = 2*nfit; if (maxbox == 0 || maxbox > npts/4) maxbox = npts/4; if (minbox > maxbox) { SWAP(minbox, maxbox); if (minbox < 2*nfit) minbox = 2*nfit; } /* Allocate and fill the box size array rs[]. rscale's third argument specifies that the ratio between successive box sizes is 2^(1/8). */ nr = rscale(minbox, maxbox, pow(2.0, 1.0/8.0)); /* Allocate memory for dfa() and the functions it calls. */ setup(); /* Measure the fluctuations of the detrended input data at each box size using the DFA algorithm; fill mse[] with these results. */ dfa(seq, npts, nfit, rs, nr, sw); /* Output the results. */ for (i = 1; i <= nr; i++) printf("%g\n", log10(mse[i])/2.0); /* Release allocated memory. */ cleanup(); exit(0); }
void eskoTest() { unsigned step = 5000; unsigned start = 30000; unsigned stop = 120000; unsigned topL = 300; unsigned minL = 5; for(unsigned cur = start; cur <= stop; cur += step) { takeSubstring("data/ecoli.seq", "data/tmpseq.seq", 0, cur); //takeSubstring("data/ecoli.seq", "data/tmpseq.seq", 278000, 293000); unsigned L = minL; unsigned R = topL; unsigned ans = 0U; while (L<=R) { unsigned mid = (L+R) / 2U; unsigned k = mid-1U; Genome genome; readGenome("data/tmpseq.seq", mid, genome); AhoCorasick * aho = new AhoCorasick(genome); aho->filterOverlaps(k); NFA_Automata nfa(*aho, genome, 0, genome.generatedReads.size()-1); delete aho; DFA_Automata dfa(nfa, genome, 1.0); // remove reads since we do not need them anymore genome.generatedReads.resize(0); genome.sequence.resize(0); bool uniqueOk = dfa.isCOAUnique(); if (uniqueOk) { R = mid-1U; ans = mid; } else { L = mid+1U; } } std::string seq = readSequence("data/tmpseq.seq"); std::cout << cur << " " << ans << " " << getLongestSingleRepeat(seq) << std::endl; } }
int main() { string re; cout << "input regex:"; cin >> re; DFA dfa(re); string text; cout << "input text:"; cin >> text; dfa.match(text); return 0; }
int main() { char re[20]; tnode *head; printf("\n enter a RE expression : "); scanf("%s",re); head = createTree(re); printf("\n\n Ind Char NUL First Last Follow "); listInPostOrder(head); printf("\n\n"); dfa(head); return 0; }
void FSMThingy::finalizeGraph(bool determinize) { if (Fsm->verticesSize() < 2) { throw std::runtime_error("No valid patterns were parsed"); } if (determinize && !Fsm->Deterministic) { NFAPtr dfa(new NFA(1, 2 * Fsm->verticesSize(), Fsm->edgesSize())); dfa->TransFac = Fsm->TransFac; Comp.subsetDFA(*dfa, *Fsm); Fsm = dfa; } Comp.labelGuardStates(*Fsm); }
void run_nums_test() { #define NODES_NUM 10 TestDG *d = create_circular_graph(NODES_NUM); // B is pointer to the last node, but since the graph is a circle, // it doesn't matter what BB we'll use DataFlowA dfa(d->getEntryBB(), no_change); dfa.run(); for (int i = 0; i < NODES_NUM; ++i) { check(d->getNode(i)->counter == 1, "did not go through the node only one time but %d", d->getNode(i)->counter); // zero out the counter for next dataflow run d->getNode(i)->counter = 0; } const analysis::DataFlowStatistics& stats = dfa.getStatistics(); check(stats.getBBlocksNum() == NODES_NUM, "wrong number of blocks: %d", stats.getBBlocksNum()); check(stats.processedBlocks == NODES_NUM, "processed more blocks than %d - %d", NODES_NUM, stats.processedBlocks); check(stats.getIterationsNum() == 1, "did wrong number of iterations: %d", stats.getIterationsNum()); DataFlowA dfa2(d->getEntryBB(), one_change); dfa2.run(); for (int i = 0; i < NODES_NUM; ++i) { check(d->getNode(i)->counter == 2, "did not go through the node only one time but %d", d->getNode(i)->counter); } const analysis::DataFlowStatistics& stats2 = dfa2.getStatistics(); check(stats2.getBBlocksNum() == NODES_NUM, "wrong number of blocks: %d", stats2.getBBlocksNum()); check(stats2.processedBlocks == 2*NODES_NUM, "processed more blocks than %d - %d", 2*NODES_NUM, stats2.processedBlocks); check(stats2.getIterationsNum() == 2, "did wrong number of iterations: %d", stats2.getIterationsNum()); #undef NODES_NUM }
void main() { //clrscr(); //system("clear"); char str[500]; inpt[0]=NULL; printf("Enter the postfix expression\n"); scanf("%s",str); node * root; int l; strcat(str,"#.\0"); l=strlen(str); l--; int i, j=0; for(i=0;i<l-1;++i) { j=0; while(inpt[j]!=NULL) { if(inpt[j]==str[i]) break; j++; } if(inpt[j]!=str[i] && str[i]!='|' && str[i]!='*' && str[i]!='.') { inpt[j]=str[i]; inpt[j+1]=NULL; } } int pos=1; root=create(str,&l); create_nullable(root,&pos); printf("NULLABLE TABLE\nElement\tFPOS\tLPOS\n"); print_nullable(root->lc); print_follow(pos-2); dfa(); display_dfa(); printf("\n"); }
static void dump_ex(const typename char_state_machine::dfa &dfa_, ostream &stream_) { const std::size_t states_ = dfa_._states.size(); const id_type bol_index_ = dfa_._bol_index; typename dfa_state::id_type_string_token_map::const_iterator iter_; typename dfa_state::id_type_string_token_map::const_iterator end_; for (std::size_t i_ = 0; i_ < states_; ++i_) { const dfa_state &state_ = dfa_._states[i_]; state(stream_); stream_ << i_ << std::endl; if (state_._end_state) { end_state(stream_); if (state_._push_pop_dfa == dfa_state::push_dfa) { push(stream_); stream_ << state_._push_dfa; } else if (state_._push_pop_dfa == dfa_state::pop_dfa) { pop(stream_); } id(stream_); stream_ << static_cast<std::size_t>(state_._id); user_id(stream_); stream_ << static_cast<std::size_t>(state_._user_id); dfa(stream_); stream_ << static_cast<std::size_t>(state_._next_dfa); stream_ << std::endl; } if (i_ == 0 && bol_index_ != char_state_machine::npos()) { bol(stream_); stream_ << static_cast<std::size_t>(bol_index_) << std::endl; } if (state_._eol_index != char_state_machine::npos()) { eol(stream_); stream_ << static_cast<std::size_t>(state_._eol_index) << std::endl; } iter_ = state_._transitions.begin(); end_ = state_._transitions.end(); for (; iter_ != end_; ++iter_) { string_token token_ = iter_->second; open_bracket(stream_); if (!iter_->second.any() && iter_->second.negatable()) { token_.negate(); negated(stream_); } string chars_; typename string_token::range_vector::const_iterator ranges_iter_ = token_._ranges.begin(); typename string_token::range_vector::const_iterator ranges_end_ = token_._ranges.end(); for (; ranges_iter_ != ranges_end_; ++ranges_iter_) { if (ranges_iter_->first == '-' || ranges_iter_->first == '^' || ranges_iter_->first == ']') { stream_ << '\\'; } chars_ = string_token::escape_char (ranges_iter_->first); if (ranges_iter_->first != ranges_iter_->second) { if (ranges_iter_->first + 1 < ranges_iter_->second) { chars_ += '-'; } if (ranges_iter_->second == '-' || ranges_iter_->second == '^' || ranges_iter_->second == ']') { stream_ << '\\'; } chars_ += string_token::escape_char (ranges_iter_->second); } stream_ << chars_; } close_bracket(stream_); stream_ << static_cast<std::size_t>(iter_->first) << std::endl; } stream_ << std::endl; } }
/** * Convert an nfa to a dfa */ void convert_nfa_dfa( Automata* nfa ) { std::vector<bool> marked; std::vector<TempState> dfaStates; /** * -- Algorithm -- * * while unmarked states exist: * if empty ( dfaStates ): * curTempState.anchors = e-closure(nfa.Start) * dfaStates.addState(curTempState) * markedState = curTempState * else: * foreach a in Alphabet: * newState_i.anchors = e-closure(a-closure(markedState)) * if not dfaStates.contains(newState_i): * dfaStates.addState(newState_i) * dfaStates.addTrans(markedState, newstate_i) * else: * dfaStates.addTrans(markedState, dfaStates.get(newState_i)) * * if state was not last: * mark dfa.nextUnmarkedState */ int curMarked = -1; do { if (dfaStates.size() == 0) { TempState DFAStartState; DFAStartState.anchors = nfa->EClosure({nfa->Start()}); dfaStates.push_back(DFAStartState); marked.push_back(false); curMarked = 0; // -- Output check -- std::cout << "E-closure(IO) = "; _print_vec(DFAStartState.anchors); std::cout << " = " << curMarked+1 << "\n"; std::cout << "\nMark " << curMarked+1 << std::endl; // -- End output check -- } else { for (int i = 0; i < nfa->Alphabet().length()-1; i++) { TempState cmSymClosure, newState; cmSymClosure.anchors = nfa->SymClosure(dfaStates[curMarked].anchors, nfa->Alphabet(i)), newState.anchors = nfa->EClosure({cmSymClosure.anchors}); // make sure the new state doens't already exist int spos = _find_state(newState.anchors, dfaStates); if (newState.anchors.size() > 0 && spos < 0) { dfaStates.push_back(newState); marked.push_back(false); dfaStates[curMarked].next[nfa->Alphabet(i)] = dfaStates.size()-1; } else { dfaStates[curMarked].next[nfa->Alphabet(i)] = spos; } // -- Output check -- if (newState.anchors.size() != 0) { _print_vec(dfaStates[curMarked].anchors); std::cout << "--" << nfa->Alphabet(i) << "--> "; _print_vec(cmSymClosure.anchors); std::cout << "\nE-closure"; _print_vec(cmSymClosure.anchors); std::cout << "= "; _print_vec(newState.anchors); std::cout << " = " << (spos >= 0 ? spos+1 : dfaStates.size()) << "\n"; } // -- End output check -- } marked[curMarked++] = true; // -- Output check if (curMarked < dfaStates.size()) { std::cout << "\nMark " << curMarked+1 << std::endl; } // -- End output check -- } } while (std::find(marked.begin(),marked.end(), false) != marked.end()); // Trim Epsilon from alphabet std::string dfaAlpha = nfa->Alphabet().substr(0,nfa->Alphabet().length()-1); // Load DFA int dfaStart = 0; std::vector<int> dfaFinalStates; for (int i = 0; i<nfa->FinalStates().size(); i++) { for (int j = 0; j<dfaStates.size(); j++) { // auto: because typing iterator types sucks sometimes auto start = dfaStates[j].anchors.begin(), end = dfaStates[j].anchors.end(); if (find(start, end, nfa->FinalStates()[i]) != end) { dfaFinalStates.push_back(j); } } } // Add transitions Automata dfa(dfaAlpha, dfaStart, dfaStates.size(), dfaFinalStates); for (int i = 0; i<dfaStates.size(); i++) { for (int j = 0; j<dfa.Alphabet().length(); j++) { if (dfaStates[i].next[dfa.Alphabet().at(j)] >= 0) dfa.AddTrans(i, dfaStates[i].next[dfa.Alphabet().at(j)], dfa.Alphabet().at(j)); } } *nfa = dfa; }
void run_nums_test_interproc() { #define NODES_NUM 5 TestDG *d = create_circular_graph(NODES_NUM); TestNode *last; for (auto It : *d) { TestDG *sub = create_circular_graph(NODES_NUM); It.second->addSubgraph(sub); } // B is pointer to the last node, but since the graph is a circle, // it doesn't matter what BB we'll use DataFlowA dfa(d->getEntryBB(), no_change); dfa.run(); for (int i = 0; i < NODES_NUM; ++i) { TestNode *n = d->getNode(i); check(n->counter == 1, "did not go through the node only one time but %d", n->counter); // check that subgraphs are untouched by the dataflow // analysis for (auto sub : n->getSubgraphs()) { // iterate over nodes for (auto It : *sub) { TestNode *n = It.second; check(n->counter == 0, "intrAproc. dataflow went to procedures (%d - %d)", n->getKey(), n->counter); TestBBlock *BB = n->getBBlock(); assert(BB); check(BB->getDFSOrder() == 0, "DataFlow went into subgraph blocks"); } } // zero out the counter for next dataflow run n->counter = 0; } // this did not go into the procedures, so we should have only // the parent graph const analysis::DataFlowStatistics& stats = dfa.getStatistics(); check(stats.getBBlocksNum() == NODES_NUM, "wrong number of blocks: %d", stats.getBBlocksNum()); check(stats.processedBlocks == NODES_NUM, "processed more blocks than %d - %d", NODES_NUM, stats.processedBlocks); check(stats.getIterationsNum() == 1, "did wrong number of iterations: %d", stats.getIterationsNum()); DataFlowA dfa2(d->getEntryBB(), one_change, analysis::DATAFLOW_INTERPROCEDURAL | analysis::DATAFLOW_BB_NO_CALLSITES); dfa2.run(); for (int i = 0; i < NODES_NUM; ++i) { TestNode *n = d->getNode(i); check(n->counter == 2, "did not go through the node only one time but %d", n->counter); // check that subgraphs are untouched by the dataflow // analysis for (auto sub : n->getSubgraphs()) { // iterate over nodes for (auto It : *sub) { TestNode *n = It.second; check(n->counter == 2, "intErproc. dataflow did NOT went to procedures (%d - %d)", n->getKey(), n->counter); TestBBlock *BB = n->getBBlock(); assert(BB); check(BB->getDFSOrder() != 0, "intErproc DataFlow did NOT went into subgraph blocks"); n->counter = 0; } } // zero out the counter for next dataflow run n->counter = 0; } // we have NODES_NUM nodes and each node has subgraph of the // same size + the blocks in parent graph // we don't go through the parameters! uint64_t blocks_num = (NODES_NUM + 1) * NODES_NUM; const analysis::DataFlowStatistics& stats2 = dfa2.getStatistics(); check(stats2.getBBlocksNum() == blocks_num, "wrong number of blocks: %d", stats2.getBBlocksNum()); check(stats2.processedBlocks == 2*blocks_num, "processed more blocks than %d - %d", 2*blocks_num, stats2.processedBlocks); check(stats2.getIterationsNum() == 2, "did wrong number of iterations: %d", stats2.getIterationsNum()); // BBlocks now keep call-sites information, so now // this should work too DataFlowA dfa3(d->getEntryBB(), one_change, analysis::DATAFLOW_INTERPROCEDURAL); dfa3.run(); for (int i = 0; i < NODES_NUM; ++i) { TestNode *n = d->getNode(i); check(n->counter == 2, "did not go through the node only one time but %d", n->counter); // check that subgraphs are untouched by the dataflow // analysis for (auto sub : n->getSubgraphs()) { // iterate over nodes for (auto It : *sub) { TestNode *n = It.second; check(n->counter == 2, "intErproc. dataflow did NOT went to procedures (%d - %d)", n->getKey(), n->counter); TestBBlock *BB = n->getBBlock(); assert(BB); check(BB->getDFSOrder() != 0, "intErproc DataFlow did NOT went into subgraph blocks"); n->counter = 0; } } // zero out the counter for next dataflow run n->counter = 0; } // we have NODES_NUM nodes and each node has subgraph of the // same size + the blocks in parent graph const analysis::DataFlowStatistics& stats3 = dfa3.getStatistics(); check(stats3.getBBlocksNum() == blocks_num, "wrong number of blocks: %d", stats3.getBBlocksNum()); check(stats3.processedBlocks == 2*blocks_num, "processed more blocks than %d - %d", 2*blocks_num, stats3.processedBlocks); check(stats3.getIterationsNum() == 2, "did wrong number of iterations: %d", stats3.getIterationsNum()); #undef NODES_NUM }
static void dump (const basic_state_machine<CharT> &state_machine_, ostream &stream_) { typename basic_state_machine<CharT>::iterator iter_ = state_machine_.begin (); typename basic_state_machine<CharT>::iterator end_ = state_machine_.end (); for (std::size_t dfa_ = 0, dfas_ = state_machine_.size (); dfa_ < dfas_; ++dfa_) { const std::size_t states_ = iter_->states; for (std::size_t i_ = 0; i_ < states_; ++i_) { state (stream_); stream_ << i_ << std::endl; if (iter_->end_state) { end_state (stream_); stream_ << iter_->id; dfa (stream_); stream_ << iter_->goto_dfa; stream_ << std::endl; } if (iter_->bol_index != npos) { bol (stream_); stream_ << iter_->bol_index << std::endl; } if (iter_->eol_index != npos) { eol (stream_); stream_ << iter_->eol_index << std::endl; } const std::size_t transitions_ = iter_->transitions; if (transitions_ == 0) { ++iter_; } for (std::size_t t_ = 0; t_ < transitions_; ++t_) { std::size_t goto_state_ = iter_->goto_state; if (iter_->token.any ()) { any (stream_); } else { open_bracket (stream_); if (iter_->token._negated) { negated (stream_); } string charset_; CharT c_ = 0; escape_control_chars (iter_->token._charset, charset_); c_ = *charset_.c_str (); if (!iter_->token._negated && (c_ == '^' || c_ == ']')) { stream_ << '\\'; } stream_ << charset_; close_bracket (stream_); } stream_ << goto_state_ << std::endl; ++iter_; } stream_ << std::endl; } } }
int main(int argc, char* argv[]) { std::string regex; std::string fileName; std::string dfaFileName; for( int i = 1 ; i < argc ; ++i){ if( strcmp(argv[i], "-r") == 0 ){ REGEX = true; regex = argv[++i]; continue; } if( strcmp(argv[i], "-f") == 0 ) { WORDS_FILE = true; fileName = argv[++i]; continue; } if( strcmp(argv[i], "-st") == 0 ) { SHOW_TREE = true; continue; } if( strcmp(argv[i], "-sa") == 0 ) { SHOW_DFA = true; dfaFileName = argv[++i]; continue; } HELP = true; break; } if( HELP ) { std::cout << "HELP" << std::endl; std::cout << std::endl; std::cout << "OPCJE : " << std::endl; std::cout << " -f po tej faldze podać nazwę pliku z lancuchami do sprawdzenia na" << std::endl; std::cout << " przynaleznosc do języka generowanego przez wyrazenie regularne." << std::endl; std::cout << " -r po tej faldze podac wyrazenie regularne." << std::endl; std::cout << " -st flaga oznaczająca wygenerowanie pliku z graficznym przedstawieniem" << std::endl; std::cout << " drzewa rozbioru wyrazenia regularnego." << std::endl; std::cout << " -sa flaga oznaczająca wygenerowanie pliku z tekstowym przedstawieniem" << std::endl; std::cout << " automatu wyrazenia regularnego." << std::endl; return 0; } if( !REGEX ) { std::cerr << "You have to specify regex to do anything!" << std::endl; exit(-1); } std::cout << "REGEX : " << regex << std::endl; Scanner scanner(regex); try { scanner.tokenize(); } catch (ScannerException & e) { std::cerr << e.what() << std::endl; exit(-1); } Syntax syntax(scanner.getTokens()); try { syntax.buildTree(); } catch (SyntaxException & e) { std::cerr << e.what() << std::endl; exit(-1); } DFA dfa(syntax.getTree()); if(WORDS_FILE){ std::ifstream words; words.open(fileName); std::string chain; while(std::getline(words, chain)) { std::cout << chain << " => "; try { scanner.tokenize(chain); } catch (ScannerException & e) { std::cerr << e.what() << std::endl; continue; } if( dfa.checkWord(scanner.getTokens()) ){ std::cout << "TRUE" << std::endl; } else { std::cout << "FALSE" << std::endl; } } } if(SHOW_TREE){ syntax.showTree(); } if(SHOW_DFA){ std::ofstream file; file.open(dfaFileName); file << dfa; file.close(); } return 0; }