/** Creates a prefix grammar from a base grammar and a input prefix (but prefix not included in grammar) * @param base_grammar the grammar that will be used as base to construct the prefix grammar * @param input_prefix * @param output_prefix * * Note: There is a problem when there are arcs in the grammar with lambda output. * If that happens, the algorithm enters in an infinite loop, since no output symbol * is consumed when using that symbol * * XXX: Currently, it only works for only input single word n-grams which are the most common case */ grammar_t * grammar_create_conditioned_to_prefix(const grammar_t *base_grammar, const symbol_t *input_prefix) { REQUIRE(base_grammar->vocab_type == GV_BILINGUAL, "Error: cannot create a wordlist grammar from secondary grammars"); REQUIRE(base_grammar->list_initial->num_elements == 1, "Error: cannot create a wordlist grammar from non n-gram grammars"); grammar_t * grammar = (grammar_t *) malloc(sizeof(grammar_t)); MEMTEST(grammar); // copy all base grammar fields *grammar = *base_grammar; grammar->vector = (state_grammar_t **) malloc(sizeof(state_grammar_t *)); MEMTEST(grammar->vector); grammar->num_states = 0; // find n-gram state that matches the prefix state_grammar_t *initial_state = base_grammar->list_initial->vector[0].state; symbol_t const * word = input_prefix; while (*word != VOCAB_NONE) { words_state_t ws = { STATE_NONE, *word, LOG_ZERO }; state_grammar_fill_word_state(initial_state, &ws); if (verbose > 1) { char *name = NULL, *name2 = NULL; extended_vocab_symbols_to_string(initial_state->name, grammar->vocab, &name); if (ws.state_next != STATE_NONE) { extended_vocab_symbols_to_string(ws.state_next->name, grammar->vocab, &name2); } PRINT("state: %s - %s -> %s, %s, p = %g\n", name, extended_vocab_get_string(grammar->vocab, *word), name2, extended_vocab_get_string(grammar->vocab, ws.word), ws.prob); free(name); free(name2); } initial_state = ws.state_next; word++; } if (initial_state == NULL) { ERROR("Unknown initial state"); } //Create list of initial states grammar->list_initial = (list_states_t *) malloc(sizeof(list_states_t)); MEMTEST(grammar->list_initial); grammar->list_initial->vector = NULL; grammar->list_initial->num_elements = 0; list_states_append(grammar->list_initial, initial_state, LOG_ONE); //Create list of end states grammar->list_end = (list_states_t *) malloc(sizeof(list_states_t)); MEMTEST(grammar->list_end); grammar->list_end->vector = NULL; grammar->list_end->num_elements = 0; for (int l = 0; l < base_grammar->list_end->num_elements; l++) { const prob_state_t *ps = &base_grammar->list_end->vector[l]; list_states_append(grammar->list_end, ps->state, ps->prob); } grammar_sort_by_prob(grammar); grammar_build_word_search(grammar); //size_t fifo_idx = 0; return grammar; }
void expand_word_state(search_prefix_t *search, prefix_fifo_t * prefix_info, const struct words_state_t *word_state, const symbol_t *ip, const symbol_t *op, float bo) { prefix_fifo_key_t key = { word_state->state_next, ip, op }; prefix_fifo_t *pf = (prefix_fifo_t *)hash_search((void *)&key, sizeof(prefix_fifo_key_t), search->hash); if (pf == NULL) { pf = (prefix_fifo_t *) malloc(sizeof(prefix_fifo_t)); pf->base_state = word_state->state_next; pf->input = ip; pf->output = op; pf->state = state_grammar_create(); pf->state->name = (symbol_t*) realloc(pf->state->name, sizeof(symbol_t) * (symlen(pf->base_state->name) + 1)); symcpy(pf->state->name, pf->base_state->name); pf->initial_prob = LOG_ZERO; pf->is_accesible = false; pf->incoming = vector_create(); vector_append(search->fifo, pf); hash_insert((void *)&key, sizeof(prefix_fifo_key_t), pf, search->hash); } incoming_t *incoming = (incoming_t *) malloc(sizeof(incoming_t)); MEMTEST(incoming); incoming->prev_pf = prefix_info; incoming->word_idx = state_grammar_append(prefix_info->state, word_state->word, word_state->prob + bo, pf->state); vector_append(pf->incoming, incoming); }
static int ui_cmd_memtest(ui_cmdline_t *cmd,int argc,char *argv[]) { long addr = 0; int len = 0; int wtype = 0; int wlen; int idx = 0; uint64_t *ptr; int error = 0; int loopmode = 0; int pass =0; getaddrargs(cmd,&wtype,&addr,&len); wlen = 8; addr &= ~(wlen-1); if ((prev_wtype & ATYPE_TYPE_MASK) == ATYPE_TYPE_PHYS) { addr = UNCADDR(addr); } if (cmd_sw_isset(cmd,"-loop")) { loopmode = 1; } len /= wlen; ptr = (uint64_t *) addr; pass = 0; for (;;) { if (loopmode) { printf("Pass %d\n",pass); if (console_status()) break; } MEMTEST(ptr,len,(idx*8)); MEMTEST(ptr,len, 0); MEMTEST(ptr,len,0xFFFFFFFFFFFFFFFFLL); MEMTEST(ptr,len,0x5555555555555555LL); MEMTEST(ptr,len,0xAAAAAAAAAAAAAAAALL); MEMTEST(ptr,len,0xFF00FF00FF00FF00LL); MEMTEST(ptr,len,0x00FF00FF00FF00FFLL); if (!loopmode) break; pass++; } return 0; }
/** Creates a wordlist grammar from a base grammar and a input/ouput prefix * @param base_grammar the grammar that will be used as base to construct the prefix grammar * @param input_prefix * @param output_prefix * * A wordlist grammar is a grammar that can only recognize one word. * Note: There is a problem when there are arcs in the grammar with lambda output. * If that happens, the algorithm enters in an infinite loop, since no output symbol * is consumed when using that symbol * * XXX: Currently, it only works for only input single word n-grams which are the most common case */ grammar_t * grammar_create_wordlist_from_prefix(const grammar_t *base_grammar, const symbol_t *input_prefix) { REQUIRE(base_grammar->vocab_type == GV_BILINGUAL, "Error: cannot create a wordlist grammar from secondary grammars"); REQUIRE(base_grammar->list_initial->num_elements == 1, "Error: cannot create a wordlist grammar from non n-gram grammars"); grammar_t * grammar = (grammar_t *) malloc(sizeof(grammar_t)); MEMTEST(grammar); // copy all base grammar fields *grammar = *base_grammar; grammar->vector = (state_grammar_t **) malloc(sizeof(state_grammar_t *)); MEMTEST(grammar->vector); grammar->num_states = 0; grammar->is_ngram = false; grammar->n = 1; //Create list of initial states grammar->list_initial = (list_states_t *) malloc(sizeof(list_states_t)); MEMTEST(grammar->list_initial); grammar->list_initial->vector = NULL; grammar->list_initial->num_elements = 0; state_grammar_t *initial_state = state_grammar_create(); grammar_append(grammar, initial_state); list_states_append(grammar->list_initial, initial_state, LOG_ONE); //Create list of end states grammar->list_end = (list_states_t *) malloc(sizeof(list_states_t)); MEMTEST(grammar->list_end); grammar->list_end->vector = NULL; grammar->list_end->num_elements = 0; state_grammar_t *end_state = state_grammar_create(); grammar_append(grammar, end_state); list_states_append(grammar->list_end, end_state, LOG_ONE); // find n-gram state that matches the prefix state_grammar_t *current_state = base_grammar->list_initial->vector[0].state; symbol_t const * word = input_prefix; while (*word != VOCAB_NONE) { words_state_t ws = { STATE_NONE, *word, LOG_ZERO }; state_grammar_fill_word_state(current_state, &ws); current_state = ws.state_next; word++; } //Create vector of visits for language model expansion bool *visit = (bool *) malloc(grammar->vocab->extended->last * sizeof(bool)); MEMTEST(visit); memset(visit, false, (grammar->vocab->extended->last) * sizeof(bool)); float backoff = 0; while (current_state != NULL) { for (int w = 0; w < current_state->num_words; w++) { if (!visit[current_state->words[w].word]) { state_grammar_append(initial_state, current_state->words[w].word, current_state->words[w].prob + backoff, end_state); visit[current_state->words[w].word] = true; } } if (current_state->state_bo != NULL && isfinite(current_state->bo)) { backoff += current_state->bo; current_state = current_state->state_bo; } else { current_state = NULL; } } free(visit); grammar_sort_by_prob(grammar); grammar_build_word_search(grammar); //size_t fifo_idx = 0; return grammar; }
/** Creates a prefix grammar from a base grammar and a input/ouput prefix * @param base_grammar the grammar that will be used as base to construct the prefix grammar * @param input_prefix * @param output_prefix * * Note: There is a problem when there are arcs in the grammar with lambda output. * If that happens, the algorithm enters in an infinite loop, since no output symbol * is consumed when using that symbol */ grammar_t * grammar_create_from_prefix(const grammar_t *base_grammar, const symbol_t *input_prefix, const symbol_t *output_prefix) { REQUIRE(base_grammar->vocab_type == GV_BILINGUAL, "Error: cannot create a prefix from secondary grammars"); if (base_grammar->start != VOCAB_NONE) { if (input_prefix != NULL && *input_prefix == base_grammar->start) { input_prefix++; } if (output_prefix != NULL && *output_prefix == base_grammar->start) { output_prefix++; } } grammar_t * grammar = (grammar_t *) malloc(sizeof(grammar_t)); MEMTEST(grammar); // copy all base grammar fields *grammar = *base_grammar; grammar->vector = (state_grammar_t **) malloc(sizeof(state_grammar_t *)); MEMTEST(grammar->vector); grammar->num_states = 0; //Create list of initial states grammar->list_initial = (list_states_t *) malloc(sizeof(list_states_t)); MEMTEST(grammar->list_initial); grammar->list_initial->vector = NULL; grammar->list_initial->num_elements = 0; //Create list of end states grammar->list_end = (list_states_t *) malloc(sizeof(list_states_t)); MEMTEST(grammar->list_end); grammar->list_end->vector = NULL; grammar->list_end->num_elements = 0; for (int l = 0; l < base_grammar->list_end->num_elements; l++) { const prob_state_t *ps = &base_grammar->list_end->vector[l]; list_states_append(grammar->list_end, ps->state, ps->prob); } if ((input_prefix == NULL || *input_prefix == VOCAB_NONE) && (output_prefix == NULL || *output_prefix == VOCAB_NONE)) { for (int l = 0; l < base_grammar->list_initial->num_elements; l++) { const prob_state_t *ps = &base_grammar->list_initial->vector[l]; list_states_append(grammar->list_initial, ps->state, ps->prob); } } else { // initialise fifo with that states search_prefix_t search; search.fifo = vector_create(); search.hash = hash_create(271, NULL); for (int l = 0; l < base_grammar->list_initial->num_elements; l++) { prefix_fifo_key_t key = { base_grammar->list_initial->vector[l].state, input_prefix, output_prefix }; prefix_fifo_t *pf = (prefix_fifo_t *)hash_search((void *) &key, sizeof(prefix_fifo_key_t), search.hash); if (pf == NULL) { pf = (prefix_fifo_t *) malloc(sizeof(prefix_fifo_t)); pf->base_state = base_grammar->list_initial->vector[l].state; pf->input = input_prefix; pf->output = output_prefix; pf->state = state_grammar_create(); pf->state->name = (symbol_t*) realloc(pf->state->name, sizeof(symbol_t) * (symlen(pf->base_state->name) + 1)); symcpy(pf->state->name, pf->base_state->name); pf->incoming = vector_create(); pf->initial_prob = base_grammar->list_initial->vector[l].prob; pf->is_accesible = false; vector_append(search.fifo, pf); hash_insert((void *) &key, sizeof(prefix_fifo_key_t), pf, search.hash); } } size_t fifo_idx = 0; // build grammar from preffix while (fifo_idx < search.fifo->n_elems) { prefix_fifo_t *prev_pf = (prefix_fifo_t *) search.fifo->data[fifo_idx++]; expand_from_prefix(&search, prev_pf, base_grammar->vocab, NULL, NULL); } for (int i = search.fifo->n_elems - 1; i >= 0; i--) { prefix_fifo_t *pf = (prefix_fifo_t *) search.fifo->data[i]; // XXX: this while loop should be made in reverse topological order // until it is done, we just ignore that some paths should be pruned // this is valid for complete prefixes pf->is_accesible = true; if (pf->is_accesible) { int l = grammar_is_final_state(base_grammar, pf->base_state); bool is_final = false; if (l != -1) { list_states_append(grammar->list_end, pf->state, base_grammar->list_end->vector[l].prob); is_final = true; } // it is an initial state if (!is_logzero(pf->initial_prob)) { list_states_append(grammar->list_initial, pf->state, pf->initial_prob); } else if (pf->incoming == NULL || pf->incoming->n_elems == 0) { pf->is_accesible = false; } // it is a regular state for (size_t n = 0; n < pf->incoming->n_elems; n++) { incoming_t *incoming = (incoming_t *)pf->incoming->data[n]; incoming->prev_pf->is_accesible = true; } // link the arc with the base grammar if we are at the end of the prefix if ((pf->input == NULL || *pf->input == VOCAB_NONE) && (pf->output == NULL || *pf->output == VOCAB_NONE) && !is_final) { for (size_t n = 0; n < pf->incoming->n_elems; n++) { incoming_t *incoming = (incoming_t *)pf->incoming->data[n]; incoming->prev_pf->state->words[incoming->word_idx].state_next = (state_grammar_t *)pf->base_state; } pf->is_accesible = false; } } else { for (size_t n = 0; n < pf->incoming->n_elems; n++) { incoming_t *incoming = (incoming_t *)pf->incoming->data[n]; incoming->prev_pf->state->words[incoming->word_idx].prob = LOG_ZERO; } } } for (size_t i = 0; i < search.fifo->n_elems; i++) { prefix_fifo_t *pf = (prefix_fifo_t *) search.fifo->data[i]; if (pf->is_accesible) { grammar_append(grammar, pf->state); } else { state_grammar_delete(pf->state); } if (pf->incoming != NULL) { for (size_t n = 0; n < pf->incoming->n_elems; n++) { free(pf->incoming->data[n]); } vector_delete(pf->incoming); } free(pf); } vector_delete(search.fifo); hash_delete(search.hash, false); grammar_sort_by_prob(grammar); grammar_build_word_search(grammar); } //size_t fifo_idx = 0; return grammar; }