void LMPainPoints::GenerateInitial(WERD_RES *word_res) {
  MATRIX *ratings = word_res->ratings;
  AssociateStats associate_stats;
  for (int col = 0; col < ratings->dimension(); ++col) {
    int row_end = MIN(ratings->dimension(), col + ratings->bandwidth() + 1);
    for (int row = col + 1; row < row_end; ++row) {
      MATRIX_COORD coord(col, row);
      if (coord.Valid(*ratings) &&
          ratings->get(col, row) != NOT_CLASSIFIED) continue;
      // Add an initial pain point if needed.
      if (ratings->Classified(col, row - 1, dict_->WildcardID()) ||
          (col + 1 < ratings->dimension() &&
              ratings->Classified(col + 1, row, dict_->WildcardID()))) {
        GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0,
                          true, max_char_wh_ratio_, word_res);
      }
    }
  }
}
示例#2
0
// Helper recursively prints all paths through the ratings matrix, starting
// at column col.
static void PrintMatrixPaths(int col, int dim,
                             const MATRIX& ratings,
                             int length, const BLOB_CHOICE** blob_choices,
                             const UNICHARSET& unicharset,
                             const char *label, FILE *output_file) {
  for (int row = col; row < dim && row - col < ratings.bandwidth(); ++row) {
    if (ratings.get(col, row) != NOT_CLASSIFIED) {
      BLOB_CHOICE_IT bc_it(ratings.get(col, row));
      for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
        blob_choices[length] = bc_it.data();
        if (row + 1 < dim) {
          PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices,
                           unicharset, label, output_file);
        } else {
          PrintPath(length + 1, blob_choices, unicharset, label, output_file);
        }
      }
    }
  }
}
示例#3
0
void Wordrec::UpdateSegSearchNodes(
    float rating_cert_scale,
    int starting_col,
    GenericVector<SegSearchPending>* pending,
    WERD_RES *word_res,
    LMPainPoints *pain_points,
    BestChoiceBundle *best_choice_bundle,
    BlamerBundle *blamer_bundle) {
  MATRIX *ratings = word_res->ratings;
  ASSERT_HOST(ratings->dimension() == pending->size());
  ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size());
  for (int col = starting_col; col < ratings->dimension(); ++col) {
    if (!(*pending)[col].WorkToDo()) continue;
    int first_row = col;
    int last_row = MIN(ratings->dimension() - 1,
                       col + ratings->bandwidth() - 1);
    if ((*pending)[col].SingleRow() >= 0) {
      first_row = last_row = (*pending)[col].SingleRow();
    }
    if (segsearch_debug_level > 0) {
      tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n",
              col, first_row, last_row,
              (*pending)[col].IsRowJustClassified(INT32_MAX));
    }
    // Iterate over the pending list for this column.
    for (int row = first_row; row <= last_row; ++row) {
      // Update language model state of this child+parent pair.
      BLOB_CHOICE_LIST *current_node = ratings->get(col, row);
      LanguageModelState *parent_node =
          col == 0 ? NULL : best_choice_bundle->beam[col - 1];
      if (current_node != NULL &&
          language_model_->UpdateState((*pending)[col].IsRowJustClassified(row),
                                       col, row, current_node, parent_node,
                                       pain_points, word_res,
                                       best_choice_bundle, blamer_bundle) &&
          row + 1 < ratings->dimension()) {
        // Since the language model state of this entry changed, process all
        // the child column.
        (*pending)[row + 1].RevisitWholeColumn();
        if (segsearch_debug_level > 0) {
          tprintf("Added child col=%d to pending\n", row + 1);
        }
      }  // end if UpdateState.
    }  // end for row.
  }  // end for col.
  if (best_choice_bundle->best_vse != NULL) {
    ASSERT_HOST(word_res->StatesAllValid());
    if (best_choice_bundle->best_vse->updated) {
      pain_points->GenerateFromPath(rating_cert_scale,
                                    best_choice_bundle->best_vse, word_res);
      if (!best_choice_bundle->fixpt.empty()) {
        pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt,
                                        best_choice_bundle->best_vse, word_res);
      }
    }
  }
  // The segsearch is completed. Reset all updated flags on all VSEs and reset
  // all pendings.
  for (int col = 0; col < pending->size(); ++col) {
    (*pending)[col].Clear();
    ViterbiStateEntry_IT
        vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries);
    for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
      vse_it.data()->updated = false;
    }
  }
}