示例#1
0
    QString printWORD(WERD_RES *wordres,int height)
    {
        WERD *word = wordres->word;
        QString aux;
        BITS16 flags;                //flags about word
        flags.set_bit(W_SEGMENTED,word->flag(W_SEGMENTED));
        flags.set_bit(W_ITALIC,word->flag(W_ITALIC));
        flags.set_bit(W_BOL,word->flag(W_BOL));
        flags.set_bit(W_EOL,word->flag(W_EOL));
        flags.set_bit(W_NORMALIZED,word->flag(W_NORMALIZED));
        flags.set_bit(W_POLYGON,word->flag(W_POLYGON));
        flags.set_bit(W_LINEARC,word->flag(W_LINEARC));
        flags.set_bit(W_DONT_CHOP,word->flag(W_DONT_CHOP));
        flags.set_bit(W_REP_CHAR,word->flag(W_REP_CHAR));
        flags.set_bit(W_FUZZY_SP,word->flag(W_FUZZY_SP));
        flags.set_bit(W_FUZZY_NON,word->flag(W_FUZZY_NON));
        aux.append(QString("Blanks= %1\n").arg(word->space()));
        aux.append(printTBOX(word->bounding_box(),height,true));
        aux.append(QString("Correct= %1\n")
                    .arg(wordres->best_choice->unichar_string().string()));
        aux.append(QString("Flags = %1 = 0%2\n").arg(flags.val)
                  .arg(flags.val,0,8));
        aux.append (QString("   W_SEGMENTED = %1\n")
                  .arg(word->flag(W_SEGMENTED) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_ITALIC = %1\n")
                  .arg(word->flag(W_ITALIC) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_BOL = %1\n")
                  .arg(word->flag(W_BOL) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_EOL = %1\n")
                  .arg(word->flag(W_EOL) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_NORMALIZED = %1\n")
                  .arg(word->flag(W_NORMALIZED) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_POLYGON = %1\n")
                  .arg(word->flag(W_POLYGON) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_LINEARC = %1\n")
                  .arg(word->flag(W_LINEARC) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_DONT_CHOP = %1\n")
                  .arg(word->flag(W_DONT_CHOP) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_REP_CHAR = %1\n")
                  .arg(word->flag(W_REP_CHAR) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_FUZZY_SP = %1\n")
                  .arg(word->flag(W_FUZZY_SP) ? "TRUE" : "FALSE "));
        aux.append (QString("   W_FUZZY_NON = %1\n")
                  .arg(word->flag(W_FUZZY_NON) ? "TRUE" : "FALSE "));
        aux.append(QString("Rejected cblob count = %1")
                    .arg(word->rej_cblob_list()->length()));
        return aux;

    }
示例#2
0
void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks,
                                  C_BLOB_LIST* output_blob_list) {
  C_BLOB_IT return_list_it(output_blob_list);
  BLOCK_IT block_it(blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.data();
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->cblob_list());
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->rej_cblob_list());
      }
    }
  }
}
示例#3
0
void Textord::clean_noise_from_words(          //remove empties
                                     ROW *row  //row to clean
                                    ) {
  TBOX blob_box;                  //bounding box
  inT8 *word_dud;                //was it chucked
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  inT32 blob_size;               //biggest size
  inT32 trans_count;             //no of transitions
  inT32 trans_threshold;         //noise tolerance
  inT32 dot_count;               //small objects
  inT32 norm_count;              //normal objects
  inT32 dud_words;               //number discarded
  inT32 ok_words;                //number remaining
  inT32 word_index;              //current word
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  ok_words = word_it.length ();
  if (ok_words == 0 || textord_no_rejects)
    return;
  word_dud = (inT8 *) alloc_mem (ok_words * sizeof (inT8));
  dud_words = 0;
  ok_words = 0;
  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
    dot_count = 0;
    norm_count = 0;
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            norm_count++;        //count smal outlines
        }
      }
      else
        norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
      && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
    }
    if (dot_count > 2) {
      if (dot_count > norm_count * textord_noise_normratio * 2)
        word_dud[word_index] = 2;
      else if (dot_count > norm_count * textord_noise_normratio)
        word_dud[word_index] = 1;
      else
        word_dud[word_index] = 0;
    }
    else
      word_dud[word_index] = 0;
    if (word_dud[word_index] == 2)
      dud_words++;
    else
      ok_words++;
    word_index++;
  }

  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    if (word_dud[word_index] == 2
    || (word_dud[word_index] == 1 && dud_words > ok_words)) {
      word = word_it.data ();    //current word
                                 //rejected blobs
      blob_it.set_to_list (word->rej_cblob_list ());
                                 //move from blobs
      blob_it.add_list_after (word->cblob_list ());
    }
    word_index++;
  }
  free_mem(word_dud);
}