/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); ASSERT_HOST(pix_binary_ != NULL); if (tessedit_dump_pageseg_images) { pixWrite("tessinput.png", pix_binary_, IFF_PNG); } // Leptonica is used to find the rule/separator lines in the input. LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines); if (tessedit_dump_pageseg_images) pixWrite("tessnolines.png", pix_binary_, IFF_PNG); // Leptonica is used to find a mask of the photo regions in the input. *photo_mask_pix = ImageFind::FindImages(pix_binary_); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear(); // The rest of the algorithm uses the usual connected components. textord_.find_components(pix_binary_, blocks, to_blocks); TO_BLOCK_IT to_block_it(to_blocks); // There must be exactly one input block. // TODO(rays) handle new textline finding with a UNLV zone file. ASSERT_HOST(to_blocks->singleton()); TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); ColumnFinder* finder = NULL; if (to_block->line_size >= 2) { finder = new ColumnFinder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), source_resolution_, textord_use_cjk_fp_model, textord_tabfind_aligned_gap_fraction, &v_lines, &h_lines, vertical_x, vertical_y); finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block); if (equ_detect_) { equ_detect_->LabelSpecialText(to_block); } BLOBNBOX_CLIST osd_blobs; // osd_orientation is the number of 90 degree rotations to make the // characters upright. (See osdetect.h for precise definition.) // We want the text lines horizontal, (vertical text indicates vertical // textlines) which may conflict (eg vertically written CJK). int osd_orientation = 0; bool vertical_text = textord_tabfind_force_vertical_text || pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; if (!vertical_text && textord_tabfind_vertical_text && PSM_ORIENTATION_ENABLED(pageseg_mode)) { vertical_text = finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, to_block, &osd_blobs); } if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) { GenericVector<int> osd_scripts; if (osd_tess != this) { // We are running osd as part of layout analysis, so constrain the // scripts to those allowed by *this. AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts); for (int s = 0; s < sub_langs_.size(); ++s) { AddAllScriptsConverted(sub_langs_[s]->unicharset, osd_tess->unicharset, &osd_scripts); } } os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess); if (pageseg_mode == PSM_OSD_ONLY) { delete finder; return NULL; } osd_orientation = osr->best_result.orientation_id; double osd_score = osr->orientations[osd_orientation]; double osd_margin = min_orientation_margin * 2; for (int i = 0; i < 4; ++i) { if (i != osd_orientation && osd_score - osr->orientations[i] < osd_margin) { osd_margin = osd_score - osr->orientations[i]; } } int best_script_id = osr->best_result.script_id; const char* best_script_str = osd_tess->unicharset.get_script_from_script_id(best_script_id); bool cjk = best_script_id == osd_tess->unicharset.han_sid() || best_script_id == osd_tess->unicharset.hiragana_sid() || best_script_id == osd_tess->unicharset.katakana_sid() || strcmp("Japanese", best_script_str) == 0 || strcmp("Korean", best_script_str) == 0 || strcmp("Hangul", best_script_str) == 0; if (cjk) { finder->set_cjk_script(true); } if (osd_margin < min_orientation_margin) { // The margin is weak. if (!cjk && !vertical_text && osd_orientation == 2) { // upside down latin text is improbable with such a weak margin. tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { tprintf("OSD: Weak margin (%.2f) for %d blob text block, " "but using orientation anyway: %d\n", osd_margin, osd_blobs.length(), osd_orientation); } } } osd_blobs.shallow_clear(); finder->CorrectOrientation(to_block, vertical_text, osd_orientation); } return finder; }
static int tprint_timex(struct tcb *tcp, long addr) { struct timex tx; #if SUPPORTED_PERSONALITIES > 1 if (current_wordsize == 4) return tprint_timex32(tcp, addr); #endif if (umove(tcp, addr, &tx) < 0) return -1; #if LINUX_VERSION_CODE < 66332 tprintf("{mode=%d, offset=%ld, frequency=%ld, ", tx.mode, tx.offset, tx.frequency); tprintf("maxerror=%ld, esterror=%lu, status=%u, ", tx.maxerror, tx.esterror, tx.status); tprintf("time_constant=%ld, precision=%lu, ", tx.time_constant, tx.precision); tprintf("tolerance=%ld, time=", tx.tolerance); tprint_timeval(tcp, &tx.time); #else tprints("{modes="); printflags(adjtimex_modes, tx.modes, "ADJ_???"); tprintf(", offset=%ld, freq=%ld, maxerror=%ld, ", (long) tx.offset, (long) tx.freq, (long) tx.maxerror); tprintf("esterror=%lu, status=", (long) tx.esterror); printflags(adjtimex_status, tx.status, "STA_???"); tprintf(", constant=%ld, precision=%lu, ", (long) tx.constant, (long) tx.precision); tprintf("tolerance=%ld, time=", (long) tx.tolerance); tprint_timeval(tcp, &tx.time); tprintf(", tick=%ld, ppsfreq=%ld, jitter=%ld", (long) tx.tick, (long) tx.ppsfreq, (long) tx.jitter); tprintf(", shift=%d, stabil=%ld, jitcnt=%ld", tx.shift, (long) tx.stabil, (long) tx.jitcnt); tprintf(", calcnt=%ld, errcnt=%ld, stbcnt=%ld", (long) tx.calcnt, (long) tx.errcnt, (long) tx.stbcnt); #endif tprints("}"); return 0; }
// Accumulates the errors from the classifier results on a single sample. // Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred. // boosting_mode selects the type of error to be used for boosting and the // is_error_ member of sample is set according to whether the required type // of error occurred. The font_table provides access to font properties // for error counting and shape_table is used to understand the relationship // between unichar_ids and shape_ids in the results bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, const FontInfoTable& font_table, const GenericVector<UnicharRating>& results, TrainingSample* sample) { int num_results = results.size(); int answer_actual_rank = -1; int font_id = sample->font_id(); int unichar_id = sample->class_id(); sample->set_is_error(false); if (num_results == 0) { // Reject. We count rejects as a separate category, but still mark the // sample as an error in case any training module wants to use that to // improve the classifier. sample->set_is_error(true); ++font_counts_[font_id].n[CT_REJECT]; } else { // Find rank of correct unichar answer, using rating_epsilon_ to allow // different answers to score as equal. (Ignoring the font.) int epsilon_rank = 0; int answer_epsilon_rank = -1; int num_top_answers = 0; double prev_rating = results[0].rating; bool joined = false; bool broken = false; int res_index = 0; while (res_index < num_results) { if (results[res_index].rating < prev_rating - rating_epsilon_) { ++epsilon_rank; prev_rating = results[res_index].rating; } if (results[res_index].unichar_id == unichar_id && answer_epsilon_rank < 0) { answer_epsilon_rank = epsilon_rank; answer_actual_rank = res_index; } if (results[res_index].unichar_id == UNICHAR_JOINED && unicharset_.has_special_codes()) joined = true; else if (results[res_index].unichar_id == UNICHAR_BROKEN && unicharset_.has_special_codes()) broken = true; else if (epsilon_rank == 0) ++num_top_answers; ++res_index; } if (answer_actual_rank != 0) { // Correct result is not absolute top. ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR]; if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) sample->set_is_error(true); } if (answer_epsilon_rank == 0) { ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK]; // Unichar OK, but count if multiple unichars. if (num_top_answers > 1) { ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; ++multi_unichar_counts_[unichar_id]; } // Check to see if any font in the top choice has attributes that match. // TODO(rays) It is easy to add counters for individual font attributes // here if we want them. if (font_table.SetContainsFontProperties( font_id, results[answer_actual_rank].fonts)) { // Font attributes were matched. // Check for multiple properties. if (font_table.SetContainsMultipleFontProperties( results[answer_actual_rank].fonts)) ++font_counts_[font_id].n[CT_OK_MULTI_FONT]; } else { // Font attributes weren't matched. ++font_counts_[font_id].n[CT_FONT_ATTR_ERR]; } } else { // This is a top unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR]; if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true); // Count maps from unichar id to wrong unichar id. ++unichar_counts_(unichar_id, results[0].unichar_id); if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) { // It is also a 2nd choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR]; if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true); } if (answer_epsilon_rank < 0) { // It is also a top-n choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR]; if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true); answer_epsilon_rank = epsilon_rank; } } // Compute mean number of return values and mean rank of correct answer. font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank; if (joined) ++font_counts_[font_id].n[CT_OK_JOINED]; if (broken) ++font_counts_[font_id].n[CT_OK_BROKEN]; } // If it was an error for boosting then sum the weight. if (sample->is_error()) { scaled_error_ += sample->weight(); if (debug) { tprintf("%d results for char %s font %d :", num_results, unicharset_.id_to_unichar(unichar_id), font_id); for (int i = 0; i < num_results; ++i) { tprintf(" %.3f : %s\n", results[i].rating, unicharset_.id_to_unichar(results[i].unichar_id)); } return true; } int percent = 0; if (num_results > 0) percent = IntCastRounded(results[0].rating * 100); bad_score_hist_.add(percent, 1); } else { int percent = 0; if (answer_actual_rank >= 0) percent = IntCastRounded(results[answer_actual_rank].rating * 100); ok_score_hist_.add(percent, 1); } return false; }
/* * get a result from the slave */ static int get_slave_result() { char *buf; char *token; char *os; char *userid; char *host; int local_port, remote_port; char *p; DESC *d; int len; buf = alloc_lbuf("slave_buf"); len = read(slave_socket, buf, LBUF_SIZE - 1); if (len < 0) { if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) { free_lbuf(buf); return (-1); } close(slave_socket); slave_socket = -1; free_lbuf(buf); return (-1); } else if (len == 0) { free_lbuf(buf); return (-1); } buf[len] = '\0'; token = alloc_lbuf("slave_token"); os = alloc_lbuf("slave_os"); userid = alloc_lbuf("slave_userid"); host = alloc_lbuf("slave_host"); if (sscanf(buf, "%s %s", host, token) != 2) { free_lbuf(buf); free_lbuf(token); free_lbuf(os); free_lbuf(userid); free_lbuf(host); return (0); } p = strchr(buf, '\n'); *p = '\0'; for (d = descriptor_list; d; d = d->next) { if (strcmp(d->addr, host)) continue; if (mudconf.use_hostname) { StringCopyTrunc(d->addr, token, 50); d->addr[50] = '\0'; if (d->player != 0) { if (d->username[0]) atr_add_raw(d->player, A_LASTSITE, tprintf("%s@%s", d->username, d->addr)); else atr_add_raw(d->player, A_LASTSITE, d->addr); } } } if (sscanf(p + 1, "%s %d , %d : %s : %s : %s", host, &remote_port, &local_port, token, os, userid) != 6) { free_lbuf(buf); free_lbuf(token); free_lbuf(os); free_lbuf(userid); free_lbuf(host); return (0); } for (d = descriptor_list; d; d = d->next) { if (ntohs((d->address).sin_port) != remote_port) continue; StringCopyTrunc(d->username, userid, 10); d->username[10] = '\0'; if (d->player != 0) { atr_add_raw(d->player, A_LASTSITE, tprintf("%s@%s", d->username, d->addr)); } free_lbuf(buf); free_lbuf(token); free_lbuf(os); free_lbuf(userid); free_lbuf(host); return (0); } free_lbuf(buf); free_lbuf(token); free_lbuf(os); free_lbuf(userid); free_lbuf(host); return (0); }
static void tprint_timeval32(struct tcb *tcp, const struct timeval32 *tv) { tprintf("{%u, %u}", tv->tv_sec, tv->tv_usec); }
inT32 OL_BUCKETS::outline_complexity( C_OUTLINE *outline, // parent outline inT32 max_count, // max output inT16 depth // recurion depth ) { inT16 xmin, xmax; // coord limits inT16 ymin, ymax; inT16 xindex, yindex; // current bucket C_OUTLINE *child; // current child inT32 child_count; // no of children inT32 grandchild_count; // no of grandchildren C_OUTLINE_IT child_it; // search iterator TBOX olbox = outline->bounding_box(); xmin =(olbox.left() - bl.x()) / BUCKETSIZE; xmax =(olbox.right() - bl.x()) / BUCKETSIZE; ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; ymax =(olbox.top() - bl.y()) / BUCKETSIZE; child_count = 0; grandchild_count = 0; if (++depth > edges_max_children_layers) // nested loops are too deep return max_count + depth; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); if (child_it.empty()) continue; for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { child = child_it.data(); if (child == outline || !(*child < *outline)) continue; child_count++; if (child_count > edges_max_children_per_outline) { // too fragmented if (edges_debug) tprintf("Discard outline on child_count=%d > " "max_children_per_outline=%d\n", child_count, static_cast<inT32>(edges_max_children_per_outline)); return max_count + child_count; } // Compute the "complexity" of each child recursively inT32 remaining_count = max_count - child_count - grandchild_count; if (remaining_count > 0) grandchild_count += edges_children_per_grandchild * outline_complexity(child, remaining_count, depth); if (child_count + grandchild_count > max_count) { // too complex if (edges_debug) tprintf("Disgard outline on child_count=%d + grandchild_count=%d " "> max_count=%d\n", child_count, grandchild_count, max_count); return child_count + grandchild_count; } } } } return child_count + grandchild_count; }
int sys_query_module(struct tcb *tcp) { if (entering(tcp)) { printstr(tcp, tcp->u_arg[0], -1); tprintf(", "); printxval(which, tcp->u_arg[1], "QM_???"); tprintf(", "); } else { size_t ret; if (!verbose(tcp) || syserror(tcp) || umove(tcp, tcp->u_arg[4], &ret) < 0) { tprintf("%#lx, %lu, %#lx", tcp->u_arg[2], tcp->u_arg[3], tcp->u_arg[4]); } else if (tcp->u_arg[1]==QM_INFO) { struct module_info mi; if (umove(tcp, tcp->u_arg[2], &mi) < 0) { tprintf("%#lx, ", tcp->u_arg[2]); } else { tprintf("{address=%#lx, size=%lu, flags=", mi.addr, mi.size); printflags(modflags, mi.flags, "MOD_???"); tprintf(", usecount=%lu}, ", mi.usecount); } tprintf("%Zu", ret); } else if ((tcp->u_arg[1]==QM_MODULES) || (tcp->u_arg[1]==QM_DEPS) || (tcp->u_arg[1]==QM_REFS)) { tprintf("{"); if (!abbrev(tcp)) { char* data = malloc(tcp->u_arg[3]); char* mod = data; size_t idx; if (!data) { fprintf(stderr, "out of memory\n"); tprintf(" /* %Zu entries */ ", ret); } else { if (umoven(tcp, tcp->u_arg[2], tcp->u_arg[3], data) < 0) { tprintf(" /* %Zu entries */ ", ret); } else { for (idx=0; idx<ret; idx++) { tprintf("%s%s", (idx ? ", " : ""), mod); mod += strlen(mod)+1; } } free(data); } } else tprintf(" /* %Zu entries */ ", ret); tprintf("}, %Zu", ret); } else if (tcp->u_arg[1]==QM_SYMBOLS) { tprintf("{"); if (!abbrev(tcp)) { char* data = malloc(tcp->u_arg[3]); struct module_symbol* sym = (struct module_symbol*)data; size_t idx; if (!data) { fprintf(stderr, "out of memory\n"); tprintf(" /* %Zu entries */ ", ret); } else { if (umoven(tcp, tcp->u_arg[2], tcp->u_arg[3], data) < 0) { tprintf(" /* %Zu entries */ ", ret); } else { for (idx=0; idx<ret; idx++) { tprintf("%s{name=%s, value=%lu}", (idx ? " " : ""), data+(long)sym->name, sym->value); sym++; } } free(data); } } else tprintf(" /* %Zu entries */ ", ret); tprintf("}, %Zd", ret); } else { printstr(tcp, tcp->u_arg[2], tcp->u_arg[3]); tprintf(", %#lx", tcp->u_arg[4]); } } return 0; }
int par_tpl( int npar, char **par_id, double *par, char *fn_in_t, char *fn_out, int debug ) { FILE *in, *out; char *sep = " \t\n"; char *word, token[2], number[80], buf[1000], *pnt_inst; word = ( char * ) malloc( 1000 * sizeof( char ) ); int i, l, l2, c, start, space = 0, bad_data = 0, preserve; if( ( in = fopen( fn_in_t, "r" ) ) == NULL ) { tprintf( "\n\nERROR: File %s cannot be opened to read template data!\n", fn_in_t ); return( -1 ); } if( debug ) tprintf( "Remove files for model inputs: %s\n", fn_out ); remove( fn_out ); if( ( out = fopen( fn_out, "w" ) ) == NULL ) { tprintf( "\n\nERROR: File %s cannot be opened to write data!\n", fn_out ); return( -1 ); } if( debug ) tprintf( "\nCreating model input file \'%s\' for external model execution using template file \'%s\'.\n", fn_out, fn_in_t ); fgets( buf, 1000, in ); pnt_inst = &buf[0]; for( c = 0, word = strtok_r( buf, sep, &pnt_inst ); word; c++, word = strtok_r( NULL, sep, &pnt_inst ) ) { if( c == 0 ) // first entry { white_trim( word ); if( strcasestr( word, "ptf" ) ) { if( debug ) tprintf( "PEST Template file\n" ); } else if( strcasestr( word, "template" ) ) { if( debug ) tprintf( "MADS Template file; user-specified parameter token is expected\n" ); } else { if( debug ) tprintf( "MADS Template file\n" ); rewind( in ); token[0] = '#'; break; // quit the loop; done } } if( c == 1 ) // second entry in the case of PEST Template file { white_trim( word ); if( strlen( word ) > 1 ) tprintf( "WARNING: expecting a single character as parameter keyword separator on the first line of template file (\'%s\'; assumed \'%s\')\n", word, token ); token[0] = word[0]; if( token[0] == 0 ) token[0] = '#'; break; } } token[1] = 0; if( debug > 1 ) tprintf( "Parameter separator: %s\n", token ); while( !feof( in ) ) { if( fgets( buf, 1000, in ) == NULL ) { if( debug > 1 ) tprintf( "END of template file.\n" ); break; } l = strlen( buf ); buf[l - 1] = 0; // remove 'new line' character if( buf[0] == token[0] ) start = 0; else start = 1; // if first character is a token it will be not considered a separator space = 0; pnt_inst = &buf[0]; for( c = 0, word = strtok_r( buf, token, &pnt_inst ); word; c++, word = strtok_r( NULL, token, &pnt_inst ) ) // separation between the tokens is expected; e.g. "# a # space # b #" { if( c % 2 == start ) { if( debug ) tprintf( "Parameter keyword \'%s\' ", word ); l = strlen( word ); white_skip( &word ); white_trim( word ); l2 = strlen( word ); if( l > ( l2 + 2 ) ) preserve = 1; else preserve = 0; for( i = 0; i < npar; i++ ) { if( strcmp( word, par_id[i] ) == 0 ) { if( preserve == 1 ) { if( par[i] > 0 ) sprintf( number, "%.*g", l - 1, par[i] ); else sprintf( number, "%.*g", l - 2, par[i] ); l2 = strlen( number ); if( l2 > l ) tprintf( "WARNING: The parameter does not fit the requested field (%s length %d > %d)!\n", number, l2, l ); } else sprintf( number, "%.15g", par[i] ); if( space ) fprintf( out, " %s", number ); else { space = 0; fprintf( out, "%s", number ); } // TODO originally was space = 1 if( debug ) tprintf( "is replaced with \'%s\'\n", number ); break; } } if( i == npar ) { if( debug ) tprintf( "ERROR: does not match defined model parameters!!!\n" ); else tprintf( "\nERROR: Parameter keyword \'%s\' in template file \'%s\' does not match defined model parameters!\n", word, fn_in_t ); bad_data = 1; } } else { if( space ) fprintf( out, " %s", word ); else { space = 0; fprintf( out, "%s", word ); } // TODO originally was space = 1 } } fprintf( out, "\n" ); } fclose( in ); fclose( out ); if( bad_data == 1 ) return( -1 ); else return( 0 ); }
void AssociateUtils::ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, const DENORM *denorm, CHUNKS_RECORD *chunks_record, int debug_level, AssociateStats *stats) { stats->Clear(); if (debug_level > 0) { tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", col, row, fixed_pitch ? " (fixed pitch)" : ""); } float normalizing_height = BASELINE_SCALE; // TODO(rays/daria) Can unicharset.script_has_xheight be useful here? if (fixed_pitch && denorm != NULL && denorm->row() != NULL) { // For fixed pitch language like CJK, we use the full text height // as the normalizing factor so we are not dependent on xheight // calculation. if (denorm->row()->body_size() > 0.0f) { normalizing_height = denorm->y_scale() * denorm->row()->body_size(); } else { normalizing_height = denorm->y_scale() * (denorm->row()->x_height() + denorm->row()->ascenders()); } if (debug_level > 0) { tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", normalizing_height, denorm->y_scale(), denorm->row()->x_height(), denorm->row()->ascenders()); } } float wh_ratio = GetChunksWidth(chunks_record->chunk_widths, col, row) / normalizing_height; if (debug_level) tprintf("wh_ratio %g\n", wh_ratio); if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true; if (fixed_pitch) { bool end_row = (row == (chunks_record->ratings->dimension() - 1)); // Ensure that the blob has gaps on the left and the right sides // (except for beginning and ending punctuation) and that there is // no cutting through ink at the blob boundaries. if (col > 0) { float left_gap = GetChunksGap(chunks_record->chunk_widths, col-1) / normalizing_height; SEAM *left_seam = static_cast<SEAM *>(array_value(chunks_record->splits, col-1)); if (debug_level) { tprintf("left_gap %g, left_seam %g\n", left_gap, left_seam->priority); } if ((!end_row && left_gap < kMinGap) || left_seam->priority > 0.0f) { stats->bad_shape = true; } } float right_gap = 0.0f; if (!end_row) { right_gap = GetChunksGap(chunks_record->chunk_widths, row) / normalizing_height; SEAM *right_seam = static_cast<SEAM *>(array_value(chunks_record->splits, row)); if (debug_level) { tprintf("right_gap %g right_seam %g\n", right_gap, right_seam->priority); } if (right_gap < kMinGap || right_seam->priority > 0.0f) { stats->bad_shape = true; if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true; } } // Impose additional segmentation penalties if blob widths or gaps // distribution don't fit a fixed-pitch model. // Since we only know the widths and gaps of the path explored so far, // the means and variances are computed for the path so far (not // considering characters to the right of the last character on the path). stats->full_wh_ratio = wh_ratio + right_gap; if (parent_stats != NULL) { stats->full_wh_ratio_total = (parent_stats->full_wh_ratio_total + stats->full_wh_ratio); float mean = stats->full_wh_ratio_total / static_cast<float>(parent_path_length+1); stats->full_wh_ratio_var = parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2); } else { stats->full_wh_ratio_total = stats->full_wh_ratio; } if (debug_level) { tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n", stats->full_wh_ratio, stats->full_wh_ratio_total, stats->full_wh_ratio_var); } stats->shape_cost = FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio); // For some reason Tesseract prefers to treat the whole CJ words // as one blob when the initial segmentation is particularly bad. // This hack is to avoid favoring such states. if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) { stats->shape_cost += 10; } stats->shape_cost += stats->full_wh_ratio_var; if (debug_level) tprintf("shape_cost %g\n", stats->shape_cost); } }
int load_pst( char *filename, struct opt_data *op ) { FILE *in; double d; char code[20], buf[1000]; int i, j, k, npar_groups, nobs_groups, bad_data = 0; struct calc_data *cd; struct param_data *pd; struct obs_data *od; struct extrn_data *ed; cd = op->cd; pd = op->pd; od = op->od; ed = op->ed; pd->nParam = pd->nFlgParam = pd->nOptParam = 0; od->nTObs = od->nCObs = od->nObs = 0; ed->ntpl = ed->nins = 0; bad_data = 0; op->gd->min_t = op-> gd->time = 0; if( ( in = fopen( filename, "r" ) ) == NULL ) { tprintf( "PEST control file %s cannot be opened to read problem data!\n", filename ); return( -1 ); } cd->opt_method = ( char * ) malloc( 50 * sizeof( char ) ); cd->solution_id = ( char * ) malloc( 50 * sizeof( char ) ); cd->solution_type = ( int * ) malloc( 1 * sizeof( int ) ); strcpy( cd->solution_id, "external" ); cd->num_sources = 1; cd->solution_type = ( int * ) malloc( sizeof( int ) ); cd->solution_type[0] = EXTERNAL; for( i = 0; i < 4; i++ ) // skip 4 lines fgets( buf, 1000, in ); sscanf( buf, "%d %d %d %*d %d", &pd->nParam, &od->nTObs, &npar_groups, &nobs_groups ); tprintf( "Parameters = %d (groups %d)\n", pd->nParam, npar_groups ); tprintf( "Observations = %d (groups %d)\n", od->nTObs, nobs_groups ); od->nObs = od->nCObs = od->nTObs; fgets( buf, 1000, in ); sscanf( buf, "%d %d", &ed->ntpl, &ed->nins ); tprintf( "Number of template files = %d\nNumber of instruction files = %d\n", ed->ntpl, ed->nins ); pd->var_name = char_matrix( pd->nParam, 50 ); pd->var_id = char_matrix( pd->nParam, 50 ); pd->var = ( double * ) malloc( pd->nParam * sizeof( double ) ); pd->var_current = ( double * ) malloc( pd->nParam * sizeof( double ) ); pd->var_best = ( double * ) malloc( pd->nParam * sizeof( double ) ); cd->var = ( double * ) malloc( pd->nParam * sizeof( double ) ); pd->var_opt = ( int * ) malloc( pd->nParam * sizeof( int ) ); pd->var_log = ( int * ) malloc( pd->nParam * sizeof( int ) ); pd->var_dx = ( double * ) malloc( pd->nParam * sizeof( double ) ); pd->var_min = ( double * ) malloc( pd->nParam * sizeof( double ) ); pd->var_max = ( double * ) malloc( pd->nParam * sizeof( double ) ); pd->var_range = ( double * ) malloc( pd->nParam * sizeof( double ) ); tprintf( "Parameters = %d:\n", pd->nParam ); for( i = 0; i < 6; i++ ) // skip 6 lines fgets( buf, 1000, in ); for( i = 0; i < npar_groups; i++ ) fgets( buf, 1000, in ); fgets( buf, 1000, in ); pd->nFlgParam = 0; pd->nOptParam = 0; for( i = 0; i < pd->nParam; i++ ) { fscanf( in, "%s %s %*s %lf %lf %lf %*s %*f %*f %*f\n", pd->var_id[i], code, &pd->var[i], &pd->var_min[i], &pd->var_max[i] ); strcpy( pd->var_name[i], pd->var_id[i] ); tprintf( "%-27s: init %15.12g min %12g max %12g\n", pd->var_name[i], pd->var[i], pd->var_min[i], pd->var_max[i] ); if( strcmp( code, "fixed" ) == 0 ) pd->var_opt[i] = 0; else { pd->nOptParam++; pd->var_opt[i] = 1; } if( strcmp( code, "log" ) == 0 ) pd->var_log[i] = 1; else pd->var_log[i] = 0; if( pd->var_log[i] == 1 ) { pd->var[i] = log10( pd->var[i] ); pd->var_min[i] = log10( pd->var_min[i] ); pd->var_max[i] = log10( pd->var_max[i] ); } pd->var_range[i] = pd->var_max[i] - pd->var_min[i]; pd->var_dx[i] = pd->var_range[i] / 10; } pd->var_index = ( int * ) malloc( pd->nOptParam * sizeof( int ) ); tprintf( "Optimized parameters = %d\n", pd->nOptParam ); for( k = i = 0; i < pd->nParam; i++ ) if( pd->var_opt[i] == 1 ) { if( pd->var_log[i] == 1 ) d = log10( pd->var[i] ); else d = pd->var[i]; tprintf( "%-27s: init %15.12g min %12g max %12g\n", pd->var_name[i], d, pd->var_min[i], pd->var_max[i] ); pd->var_index[k++] = i; } for( i = 0; i < pd->nParam; i++ ) for( j = i + 1; j < pd->nParam; j++ ) if( strcmp( pd->var_name[i], pd->var_name[j] ) == 0 ) { tprintf( "ERROR: Parameter names #%i (%s) and #%i (%s) are identical!\n", i + 1, pd->var_name[i], j + 1, pd->var_name[j] ); bad_data = 1; } if( bad_data ) return( 0 ); fgets( buf, 1000, in ); // skip line for( i = 0; i < nobs_groups; i++ ) fgets( buf, 1000, in ); fgets( buf, 1000, in ); // skip line od->obs_id = char_matrix( od->nTObs, 50 ); od->obs_target = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->obs_weight = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->obs_min = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->obs_max = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->obs_current = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->obs_best = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->res = ( double * ) malloc( od->nTObs * sizeof( double ) ); od->obs_log = ( int * ) malloc( od->nTObs * sizeof( int ) ); for( i = 0; i < od->nTObs; i++ ) fscanf( in, "%s %lf %lf %*s\n", od->obs_id[i], &od->obs_target[i], &od->obs_weight[i] ); tprintf( "Calibration targets = %d\n", od->nTObs ); for( i = 0; i < od->nTObs; i++ ) { if( od->nTObs < 50 || ( i < 20 || i > od->nTObs - 20 ) ) tprintf( "%-13s: value %15.12g weight %g\n", od->obs_id[i], od->obs_target[i], od->obs_weight[i] ); if( od->nTObs > 50 && i == 21 ) tprintf( "...\n" ); od->obs_min[i] = 0; od->obs_max[i] = od->obs_target[i] * 2; od->obs_log[i] = 0; } if( od->nObs < 10000 || cd->problem_type == CHECK || cd->debug > 10 ) { tprintf( "Checking for duplicate observations ... \n" ); if( od->nObs >= 10000 ) tprintf( "WARNING: The number of observations is large (%d); this may take a long time ... \n", od->nObs ); for( i = 0; i < od->nTObs; i++ ) for( j = i + 1; j < od->nTObs; j++ ) if( strcmp( od->obs_id[i], od->obs_id[j] ) == 0 ) { tprintf( "ERROR: Observation names #%i (%s) and #%i (%s) are identical!\n", i + 1, od->obs_id[i], j + 1, od->obs_id[j] ); bad_data = 1; } } if( bad_data ) return( 0 ); fgets( buf, 1000, in ); // skip line ed->cmdline = ( char * ) malloc( 255 * sizeof( char ) ); fgets( ed->cmdline, 255, in ); ed->cmdline[strlen( ed->cmdline ) - 1] = 0; tprintf( "Execution command: %s\n", ed->cmdline ); tprintf( "External files:\n" ); ed->fn_ins = char_matrix( ed->nins, 255 ); ed->fn_obs = char_matrix( ed->nins, 255 ); ed->fn_tpl = char_matrix( ed->ntpl, 255 ); ed->fn_out = char_matrix( ed->ntpl, 255 ); fgets( buf, 1000, in ); // skip line for( i = 0; i < ed->ntpl; i++ ) fscanf( in, "%s %s\n", ed->fn_tpl[i], ed->fn_out[i] ); tprintf( "- to provide current model parameters:\n" ); for( i = 0; i < ed->ntpl; i++ ) tprintf( "%s -> %s\n", ed->fn_tpl[i], ed->fn_out[i] ); for( i = 0; i < ed->nins; i++ ) fscanf( in, "%s %s\n", ed->fn_ins[i], ed->fn_obs[i] ); tprintf( "- to read current model predictions:\n" ); for( i = 0; i < ed->nins; i++ ) tprintf( "%s <- %s\n", ed->fn_ins[i], ed->fn_obs[i] ); fclose( in ); tprintf( "\n" ); return( 1 ); }
int check_par_tpl( int npar, char **par_id, int *par_count, char *fn_in_t, int debug ) { FILE *in; char *sep = " \t\n"; // White spaces char *word, token[2], buf[1000], *pnt_inst; int i, l, c, start = 0, bad_data = 0; if( ( in = fopen( fn_in_t, "r" ) ) == NULL ) { tprintf( "\n\nERROR: File %s cannot be opened to read template data!\n", fn_in_t ); return( -1 ); } if( debug ) tprintf( "\nChecking the template file \'%s\'.\n", fn_in_t ); fgets( buf, 1000, in ); pnt_inst = &buf[0]; for( c = 0, word = strtok_r( buf, sep, &pnt_inst ); word; c++, word = strtok_r( NULL, sep, &pnt_inst ) ) { if( c == 0 ) // first entry { white_trim( word ); if( strstr( word, "ptf" ) ) { if( debug ) tprintf( "PEST Template file\n" ); } else if( strcasestr( word, "template" ) ) { if( debug ) tprintf( "MADS Template file; user-specified parameter token is expected\n" ); } else { if( debug ) tprintf( "MADS Template file\n" ); rewind( in ); token[0] = '#'; // default tokes break; // quit the loop; done } } if( c == 1 ) // second entry in the case of PEST Template file { white_trim( word ); if( strlen( word ) > 1 ) tprintf( "WARNING: expecting a single character as parameter keyword separator on the first line of template file (\'%s\'; assumed \'%s\')\n", word, token ); token[0] = word[0]; if( token[0] == 0 ) token[0] = '#'; break; } } token[1] = 0; if( debug ) tprintf( "Parameter separator: %s\n", token ); while( !feof( in ) ) { if( fgets( buf, 1000, in ) == NULL ) { if( debug > 1 ) tprintf( "END of template file.\n" ); break; } l = strlen( buf ); buf[l - 1] = 0; if( buf[0] == token[0] ) start = 0; else start = 1; pnt_inst = &buf[0]; for( c = 0, word = strtok_r( buf, token, &pnt_inst ); word; c++, word = strtok_r( NULL, token, &pnt_inst ) ) // separation between the tokens is expected; e.g. space # b #" { // tprintf( "%d %s\n", c, word ); if( c % 2 == start ) { if( debug ) tprintf( "Parameter keyword \'%s\' ", word ); l = strlen( word ); white_skip( &word ); white_trim( word ); for( i = 0; i < npar; i++ ) { if( strcmp( word, par_id[i] ) == 0 ) { if( debug ) tprintf( "will be replaced with the value of model parameter \'%s\'\n", par_id[i] ); if( par_count[i] < 0 ) par_count[i] = 1; else par_count[i] += 1; break; } } if( i == npar ) { if( debug ) tprintf( "ERROR: does not match defined model parameters!!!\n" ); else tprintf( "\nERROR: Parameter keyword \'%s\' in template file \'%s\' does not match defined model parameters!\n", word, fn_in_t ); bad_data = 1; } } } } fclose( in ); if( bad_data == 1 ) return( -1 ); else return( 0 ); }
int ins_obs( int nobs, char **obs_id, double *obs, int *obs_count, char *fn_in_i, char *fn_in_d, int debug ) { FILE *infile_inst, *infile_data; char *separator = " \t\n"; char *word_inst, *word_data, *word_search, token_search[2], token_obs[2], comment[2], dummy_var[6], buf_data[1000], buf_inst[1000], *pnt_inst, *pnt_data; int i, c, bad_data = 0, sl; double v; if( ( infile_inst = fopen( fn_in_i, "r" ) ) == NULL ) { tprintf( "\nERROR: File %s cannot be opened to read template data!\n", fn_in_i ); return( -1 ); } if( ( infile_data = fopen( fn_in_d, "r" ) ) == NULL ) { tprintf( "\nERROR: File %s cannot be opened to read the model-predicted observations!\n", fn_in_d ); return( -1 ); } if( debug ) tprintf( "\nReading output file \'%s\' obtained from external model execution using instruction file \'%s\'.\n", fn_in_d, fn_in_i ); fgets( buf_inst, 1000, infile_inst ); if( debug > 1 ) tprintf( "First instruction line: %s\n", buf_inst ); pnt_inst = &buf_inst[0]; for( c = 0, word_inst = strtok_r( buf_inst, separator, &pnt_inst ); word_inst; c++, word_inst = strtok_r( NULL, separator, &pnt_inst ) ) { if( c == 0 ) // first entry { white_trim( word_inst ); if( strcasestr( word_inst, "pif" ) ) { if( debug > 1 ) tprintf( "PEST Instruction file\n" ); token_search[0] = '@'; // just in case token_obs[0] = '!'; comment[0] = 0; } else if( strcasestr( word_inst, "instruction" ) ) { if( debug > 1 ) tprintf( "MADS Instruction file; user-specified search/variable tokens are expected\n" ); token_search[0] = '@'; // just in case token_obs[0] = '!'; comment[0] = '#'; } else { if( debug > 1 ) tprintf( "MADS Instruction file\n" ); rewind( infile_inst ); token_search[0] = '@'; token_obs[0] = '!'; comment[0] = '#'; break; } } else if( c == 1 ) // second entry; "search" token { white_trim( word_inst ); token_search[0] = word_inst[0]; if( strlen( word_inst ) > 1 ) tprintf( "WARNING: expecting a single character as search separator on the first line of instruction file (\'%s\'; assumed \'%s\')\n", word_inst, token_search ); if( token_search[0] == 0 ) token_search[0] = '@'; } else if( c == 2 ) // third entry; "variable" token { white_trim( word_inst ); token_obs[0] = word_inst[0]; if( strlen( word_inst ) > 1 ) tprintf( "WARNING: expecting a single character as search separator on the first line of instruction file (\'%s\'; assumed \'%s\')\n", word_inst, token_search ); if( token_obs[0] == 0 ) token_obs[0] = '!'; } else if( c == 3 ) // third entry; "comment" token { white_trim( word_inst ); comment[0] = word_inst[0]; if( strlen( word_inst ) > 1 ) tprintf( "WARNING: expecting a single character as search separator on the first line of instruction file (\'%s\'; assumed \'%s\')\n", word_inst, token_search ); if( comment[0] == 0 ) comment[0] = '#'; break; } } token_search[1] = token_obs[1] = 0; dummy_var[0] = token_obs[0]; dummy_var[1] = 0; strcat( dummy_var, "dum" ); dummy_var[4] = token_obs[0]; dummy_var[5] = 0; token_obs[1] = token_search[1] = comment[1] = 0; if( debug > 1 ) { tprintf( "Search separator: %s\n", token_search ); tprintf( "Observation separator: %s\n", token_obs ); tprintf( "Dummy observation: %s\n", dummy_var ); if( comment[0] ) tprintf( "Comment: %s\n", comment ); } buf_data[0] = 0; word_data = NULL; while( !feof( infile_inst ) ) { if( fgets( buf_inst, 1000, infile_inst ) == NULL ) { if( debug > 1 ) tprintf( "END of instruction file.\n" ); break; } pnt_inst = &buf_inst[0]; word_inst = 0; white_trim( pnt_inst ); white_skip( &pnt_inst ); if( debug > 1 ) tprintf( "\n\nCurrent instruction line: %s\n", pnt_inst ); if( comment[0] && pnt_inst[0] == comment[0] ) { if( debug > 1 ) tprintf( "Comment; skip this line.\n" ); continue; } // Instruction line is a comment if( strlen( pnt_inst ) == 0 ) { if( debug ) tprintf( "Empty line; will be skipped.\n" ); continue; } pnt_data = NULL; if( pnt_inst[0] == 'l' ) // skip lines in the "data" file { sscanf( &pnt_inst[1], "%d", &c ); if( debug > 1 ) tprintf( "Skip %d lines\n", c ); for( i = 0; i < c; i++ ) if( fgets( buf_data, 1000, infile_data ) == NULL ) { tprintf( "\nERROR: Model output file \'%s\' is incomplete or instruction file \'%s\' is inaccurate!\n Model output file \'%s\' ended before instruction file \'%s\' is completely processed!\n", fn_in_d, fn_in_i, fn_in_d, fn_in_i ); break; } word_inst = strtok_r( NULL, separator, &pnt_inst ); // skip l command if( feof( infile_data ) ) { tprintf( "\nERROR: Model output file \'%s\' is incomplete or instruction file \'%s\' is inaccurate!\n Model output file \'%s\' ended before instruction file \'%s\' is completely processed!\n", fn_in_d, fn_in_i, fn_in_d, fn_in_i ); break; } white_trim( buf_data ); pnt_data = &buf_data[0]; word_data = NULL; } if( pnt_data == NULL ) // if there was no "l" (skip line) command, read the next "data" line { if( debug > 1 ) tprintf( "Read the next \'data\' line (there was no \'l\' (skip line) command)\n" ); fgets( buf_data, 1000, infile_data ); white_trim( buf_data ); pnt_data = &buf_data[0]; word_data = NULL; } if( debug > 1 ) tprintf( "Current location in model output file: => \'%s\' <= \'%s\'\n", word_data, pnt_data ); if( debug ) { if( pnt_data != NULL ) { if( pnt_data[strlen( pnt_data ) - 2] != '\n' ) {} } } c = 0; while( 1 ) { if( pnt_inst[0] == token_search[0] ) // search for keyword { if( debug > 1 ) tprintf( "KEYWORD search " ); word_search = strtok_r( NULL, token_search, &pnt_inst ); // read search keyword if( debug > 1 ) tprintf( "\'%s\' in the data file ...\n", word_search ); bad_data = 1; while( !feof( infile_data ) ) { if( ( pnt_data = strstr( pnt_data, word_search ) ) != NULL ) { pnt_data += strlen( word_search ); if( debug > 1 ) tprintf( "Matching data file location \'=>%s<=%s\'\n", word_search, pnt_data ); bad_data = 0; break; } if( fgets( buf_data, 1000, infile_data ) == NULL ) { tprintf( "\nERROR: Model output file \'%s\' is incomplete or instruction file \'%s\' is inaccurate!\n Model output file \'%s\' ended before instruction file \'%s\' is completely processed!\n", fn_in_d, fn_in_i, fn_in_d, fn_in_i ); break; } white_trim( buf_data ); pnt_data = &buf_data[0]; word_data = NULL; // Force reading } if( bad_data == 1 ) { tprintf( "\nERROR: Search keyword \'%s\' cannot be found in the data file \'%s\'!\n", word_search, fn_in_d ); return( -1 ); } } else // no keyword search { word_inst = strtok_r( NULL, separator, &pnt_inst ); // read TEMPLETE word if( debug > 1 ) tprintf( "Current location in instruction input file: => \'%s\' <= \'%s\'\n", word_inst, pnt_inst ); white_trim( word_inst ); if( debug > 1 ) tprintf( "INSTRUCTION word \'%s\' : ", word_inst ); if( strncmp( word_inst, dummy_var, 5 ) == 0 ) // dummy variable { if( debug > 1 ) tprintf( "Skip dummy data!\n" ); if( word_data == NULL ) word_data = strtok_r( NULL, separator, &pnt_data ); word_data = strtok_r( NULL, separator, &pnt_data ); if( debug > 1 ) tprintf( "Current location in model output file: => \'%s\' <= \'%s\'\n", word_data, pnt_data ); } else if( word_inst[0] == 'w' ) // white space { if( debug > 1 ) tprintf( "Skip white space!\n" ); if( !iswhite( pnt_data[0] ) ) { if( word_data == NULL ) word_data = strtok_r( NULL, separator, &pnt_data ); word_data = strtok_r( NULL, separator, &pnt_data ); } else { word_data = strtok_r( NULL, separator, &pnt_data ); } if( debug > 1 ) tprintf( "Current location in model output file: => \'%s\' <= \'%s\'\n", word_data, pnt_data ); } else if( word_inst[0] == token_obs[0] ) // observation variable { if( debug ) tprintf( "Observation variable\n" ); c++; if( word_data == NULL || c > 1 ) word_data = strtok_r( NULL, separator, &pnt_data ); if( strlen( word_inst ) == 1 ) word_inst = strtok_r( NULL, separator, &pnt_inst ); else word_inst = &word_inst[1]; sl = strlen( word_inst ); if( word_inst[sl - 1] == token_obs[0] ) word_inst[sl - 1] = 0; else strtok_r( NULL, separator, &pnt_inst ); white_skip( &word_inst ); white_trim( word_inst ); if( debug ) tprintf( "Observation keyword \'%s\' & data field \'%s\' ... ", word_inst, word_data ); if( word_data == NULL || strlen( word_data ) == 0 ) { tprintf( "ERROR: Mismatch between the instruction file \'%s\' and the data file \'%s\'!\n", fn_in_i, fn_in_d ); tprintf( "INSTRUCTION word \'%s\'\n", word_inst ); tprintf( "Current location in instruction input file: => \'%s\' <= \'%s\'\n", word_inst, pnt_inst ); tprintf( "Current location in model output file: => \'%s\' <= \'%s\'\n", word_data, pnt_data ); bad_data = 1; break; } for( i = 0; i < nobs; i++ ) { if( strcmp( word_inst, obs_id[i] ) == 0 ) { sscanf( word_data, "%lf", &v ); if( obs_count[i] == 0 ) { obs[i] = v; obs_count[i] = 1; } else { obs[i] += v; obs_count[i]++; } if( debug ) tprintf( "\'%s\'=%d\n", obs_id[i], obs[i] ); break; } } if( nobs == i ) { tprintf( "\nERROR: Observation keyword \'%s\' does not match any of observation variables!\n", word_inst ); bad_data = 1; } } else if( comment[0] && word_inst[0] == comment[0] ) // comment { if( debug > 1 ) tprintf( "Comment. Skip rest of the instruction line!\n" ); break; } else { tprintf( "\nERROR: Instruction file %s does not follow the expected format!\n", fn_in_i ); tprintf( "White space (w), search (%s) or observation (%s) tokens are expected!\n", token_search, token_obs ); bad_data = 1; break; } } if( pnt_inst == NULL || strlen( pnt_inst ) == 0 ) break; } } fclose( infile_data ); fclose( infile_inst ); if( bad_data ) return( -1 ); else return( 0 ); }
int check_ins_obs( int nobs, char **obs_id, int *obs_count, char *fn_in_i, int debug ) { FILE *infile_inst; char *separator = " \t\n"; char *word_inst, *word_search, token_obs[2], token_search[2], comment[2], dummy_var[6], buf_inst[1000], *pnt_inst; int i, c, bad_data = 0; if( debug ) tprintf( "\nChecking instruction file \'%s\'.\n", fn_in_i ); if( ( infile_inst = fopen( fn_in_i, "r" ) ) == NULL ) { tprintf( "\n\nERROR: File %s cannot be opened to read template data!\n", fn_in_i ); return( -1 ); } fgets( buf_inst, 1000, infile_inst ); if( debug ) tprintf( "\nFirst instruction line: %s\n", buf_inst ); pnt_inst = &buf_inst[0]; for( c = 0, word_inst = strtok_r( buf_inst, separator, &pnt_inst ); word_inst; c++, word_inst = strtok_r( NULL, separator, &pnt_inst ) ) { if( c == 0 ) // first entry { white_trim( word_inst ); if( strcasestr( word_inst, "pif" ) ) { if( debug ) tprintf( "PEST Instruction file\n" ); token_search[0] = '@'; // just in case token_obs[0] = '!'; comment[0] = 0; } else if( strcasestr( word_inst, "instruction" ) ) { if( debug ) tprintf( "MADS Instruction file; user-specified search/variable tokens are expected\n" ); token_search[0] = '@'; // just in case token_obs[0] = '!'; comment[0] = '#'; } else { if( debug ) tprintf( "MADS Instruction file\n" ); rewind( infile_inst ); token_search[0] = '@'; token_obs[0] = '!'; comment[0] = '#'; break; } } else if( c == 1 ) // second entry; "search" token { white_trim( word_inst ); if( debug > 1 ) tprintf( "Search token %s\n", word_inst ); token_search[0] = word_inst[0]; if( strlen( word_inst ) > 1 ) tprintf( "WARNING: expecting a single character as search separator on the first line of instruction file (\'%s\'; assumed \'%s\')\n", word_inst, token_search ); if( token_search[0] == 0 ) token_search[0] = '@'; } else if( c == 2 ) // third entry; "variable" token { white_trim( word_inst ); if( debug > 1 ) tprintf( "Variable token %s\n", word_inst ); token_obs[0] = word_inst[0]; if( strlen( word_inst ) > 1 ) tprintf( "WARNING: expecting a single character as search separator on the first line of instruction file (\'%s\'; assumed \'%s\')\n", word_inst, token_search ); if( token_obs[0] == 0 ) token_obs[0] = '!'; } else if( c == 3 ) // third entry; "comment" token { white_trim( word_inst ); if( debug > 1 ) tprintf( "Comment token %s\n", word_inst ); comment[0] = word_inst[0]; if( strlen( word_inst ) > 1 ) tprintf( "WARNING: expecting a single character as search separator on the first line of instruction file (\'%s\'; assumed \'%s\')\n", word_inst, token_search ); if( comment[0] == 0 ) comment[0] = '#'; } } token_search[1] = token_obs[1] = 0; dummy_var[0] = token_obs[0]; dummy_var[1] = 0; strcat( dummy_var, "dum" ); dummy_var[4] = token_obs[0]; dummy_var[5] = 0; token_obs[1] = token_search[1] = comment[1] = 0; if( debug ) { tprintf( "Search separator: %s\n", token_search ); tprintf( "Observation separator: %s\n", token_obs ); tprintf( "Dummy observation: %s\n", dummy_var ); if( comment[0] ) tprintf( "Comment: %s\n", comment ); } while( !feof( infile_inst ) ) // IMPORTANT: strtok below modifies buf_inst by adding '\0's; if needed strcpy buf_inst { if( fgets( buf_inst, 1000, infile_inst ) == NULL ) { if( debug > 1 ) tprintf( "END of instruction file.\n" ); break; } pnt_inst = &buf_inst[0]; word_inst = 0; white_trim( pnt_inst ); white_skip( &pnt_inst ); if( debug ) tprintf( "\nCurrent instruction line: %s\n", pnt_inst ); if( comment[0] && pnt_inst[0] == comment[0] ) { if( debug > 1 ) tprintf( "Comment; skip this line.\n" ); continue; } // Instruction line is a comment if( strlen( pnt_inst ) == 0 ) { if( debug ) tprintf( "Empty line; will be skipped.\n" ); continue; } // Empty line if( pnt_inst[0] == 'l' ) // skip lines in the "data" file { sscanf( &pnt_inst[1], "%d", &c ); if( debug > 1 ) tprintf( "Skip %d lines\n", c ); word_inst = strtok_r( NULL, separator, &pnt_inst ); // skip l command } while( 1 ) { if( pnt_inst[0] == token_search[0] ) // search for keyword { if( debug ) tprintf( "KEYWORD search " ); word_search = strtok_r( NULL, token_search, &pnt_inst ); // read search keyword if( debug ) tprintf( "\'%s\' in the data file ...\n", word_search ); } else { word_inst = strtok_r( NULL, separator, &pnt_inst ); // read TEMPLETE word if( debug > 1 ) tprintf( "Current location in instruction input file: => \'%s\' <= \'%s\'\n", word_inst, pnt_inst ); white_trim( word_inst ); if( debug ) tprintf( "INSTRUCTION word \'%s\' : ", word_inst ); if( strncmp( word_inst, dummy_var, 5 ) == 0 ) // dummy variable { if( debug ) tprintf( "Skip dummy data!\n" ); } else if( word_inst[0] == 'w' ) // white space { if( debug ) tprintf( "Skip white space!\n" ); } else if( word_inst[0] == token_obs[0] ) // observation variable { c = 0; if( strlen( word_inst ) == 1 ) word_inst = strtok_r( NULL, separator, &pnt_inst ); else word_inst = &word_inst[1]; if( word_inst[strlen( word_inst ) - 1] == token_obs[0] ) word_inst[strlen( word_inst ) - 1] = 0; else strtok_r( NULL, separator, &pnt_inst ); if( debug ) tprintf( "Observation keyword \'%s\' ... ", word_inst ); white_skip( &word_inst ); white_trim( word_inst ); for( i = 0; i < nobs; i++ ) { if( strcmp( word_inst, obs_id[i] ) == 0 ) { obs_count[i]++; if( debug ) tprintf( "\'%s\' detected %d times\n", obs_id[i], obs_count[i] ); break; } } if( nobs == i ) { tprintf( "\nERROR: Observation keyword \'%s\' does not match any of observation variables!\n", word_inst ); bad_data = 1; } } else if( comment[0] && word_inst[0] == comment[0] ) // comment { if( debug ) tprintf( "Comment. Skip rest of the instruction line!\n" ); break; } else { tprintf( "\nERROR: Instruction file %s does not follow the expected format!\n", fn_in_i ); tprintf( "White space (w), search (%s) or observation (%s) tokens are expected!\n", token_search, token_obs ); bad_data = 1; break; } } if( pnt_inst == NULL || strlen( pnt_inst ) == 0 ) break; } } fclose( infile_inst ); if( bad_data ) return( -1 ); else return( 0 ); }
void BLOCK::print( //print list of sides FILE *, //< file to print on BOOL8 dump //< print full detail ) { ICOORDELT_IT it = &leftside; //iterator box.print (); tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); tprintf ("Kerning= %d\n", kerning); tprintf ("Spacing= %d\n", spacing); tprintf ("Fixed_pitch=%d\n", pitch); tprintf ("Filename= %s\n", filename.string ()); if (dump) { tprintf ("Left side coords are:\n"); for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); tprintf ("\n"); tprintf ("Right side coords are:\n"); it.set_to_list (&rightside); for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); tprintf ("\n"); } }
WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs, C_BLOB_LIST* orphan_blobs) { C_BLOB_LIST current_blob_list; C_BLOB_IT werd_blobs_it(¤t_blob_list); // Add the word's c_blobs. werd_blobs_it.add_list_after(cblob_list()); // New blob list. These contain the blobs which will form the new word. C_BLOB_LIST new_werd_blobs; C_BLOB_IT new_blobs_it(&new_werd_blobs); // not_found_blobs contains the list of current word's blobs for which a // corresponding blob wasn't found in the input all_blobs list. C_BLOB_LIST not_found_blobs; C_BLOB_IT not_found_it(¬_found_blobs); not_found_it.move_to_last(); werd_blobs_it.move_to_first(); for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list(); werd_blobs_it.forward()) { C_BLOB* werd_blob = werd_blobs_it.extract(); TBOX werd_blob_box = werd_blob->bounding_box(); bool found = false; // Now find the corresponding blob for this blob in the all_blobs // list. For now, follow the inefficient method of pairwise // comparisons. Ideally, one can pre-bucket the blobs by row. C_BLOB_IT all_blobs_it(all_blobs); for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) { C_BLOB* a_blob = all_blobs_it.data(); // Compute the overlap of the two blobs. If major, a_blob should // be added to the new blobs list. TBOX a_blob_box = a_blob->bounding_box(); if (a_blob_box.null_box()) { tprintf("Bounding box couldn't be ascertained\n"); } if (werd_blob_box.contains(a_blob_box) || werd_blob_box.major_overlap(a_blob_box)) { // Old blobs are from minimal splits, therefore are expected to be // bigger. The new small blobs should cover a significant portion. // This is it. all_blobs_it.extract(); new_blobs_it.add_after_then_move(a_blob); found = true; } } if (!found) { not_found_it.add_after_then_move(werd_blob); } else { delete werd_blob; } } // Iterate over all not found blobs. Some of them may be due to // under-segmentation (which is OK, since the corresponding blob is already // in the list in that case. not_found_it.move_to_first(); for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) { C_BLOB* not_found = not_found_it.data(); TBOX not_found_box = not_found->bounding_box(); C_BLOB_IT existing_blobs_it(new_blobs_it); for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list(); existing_blobs_it.forward()) { C_BLOB* a_blob = existing_blobs_it.data(); TBOX a_blob_box = a_blob->bounding_box(); if ((not_found_box.major_overlap(a_blob_box) || a_blob_box.major_overlap(not_found_box)) && not_found_box.y_overlap_fraction(a_blob_box) > 0.8) { // Already taken care of. delete not_found_it.extract(); break; } } } if (orphan_blobs) { C_BLOB_IT orphan_blobs_it(orphan_blobs); orphan_blobs_it.move_to_last(); orphan_blobs_it.add_list_after(¬_found_blobs); } // New blobs are ready. Create a new werd object with these. WERD* new_werd = NULL; if (!new_werd_blobs.empty()) { new_werd = new WERD(&new_werd_blobs, this); } else { // Add the blobs back to this word so that it can be reused. C_BLOB_IT this_list_it(cblob_list()); this_list_it.add_list_after(¬_found_blobs); } return new_werd; }
int main(int argc, char **argv) { if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { char *versionStrP; fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); versionStrP = getLeptonicaVersion(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); versionStrP = getImagelibVersions(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); #ifdef USE_OPENCL cl_platform_id platform; cl_uint num_platforms; cl_device_id devices[2]; cl_uint num_devices; char info[256]; int i; fprintf(stderr, " OpenCL info:\n"); clGetPlatformIDs(1, &platform, &num_platforms); fprintf(stderr, " Found %d platforms.\n", num_platforms); clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0); fprintf(stderr, " Platform name: %s.\n", info); clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0); fprintf(stderr, " Version: %s.\n", info); clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices); fprintf(stderr, " Found %d devices.\n", num_devices); for (i = 0; i < num_devices; ++i) { clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); fprintf(stderr, " Device %d name: %s.\n", i+1, info); } #endif exit(0); } // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* output = NULL; const char* datapath = NULL; bool noocr = false; bool list_langs = false; bool print_parameters = false; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (output == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) { datapath = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "--list-langs") == 0) { noocr = true; list_langs = true; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (strcmp(argv[arg], "--print-parameters") == 0) { noocr = true; print_parameters = true; } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { // handled properly after api init ++arg; } else if (image == NULL) { image = argv[arg]; } else if (output == NULL) { output = argv[arg]; } ++arg; } if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { list_langs = true; noocr = true; } if (output == NULL && noocr == false) { fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout " "[options...] [configfile...]\n\n", argv[0]); fprintf(stderr, "OCR options:\n"); fprintf(stderr, " --tessdata-dir /path\tspecify location of tessdata" " path\n"); fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n"); fprintf(stderr, " -c configvar=value\tset value for control parameter.\n" "\t\t\tMultiple -c arguments are allowed.\n"); fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n"); fprintf(stderr, "These options must occur before any configfile.\n\n"); fprintf(stderr, "pagesegmode values are:\n" " 0 = Orientation and script detection (OSD) only.\n" " 1 = Automatic page segmentation with OSD.\n" " 2 = Automatic page segmentation, but no OSD, or OCR\n" " 3 = Fully automatic page segmentation, but no OSD. (Default)\n" " 4 = Assume a single column of text of variable sizes.\n" " 5 = Assume a single uniform block of vertically aligned text.\n" " 6 = Assume a single uniform block of text.\n" " 7 = Treat the image as a single text line.\n" " 8 = Treat the image as a single word.\n" " 9 = Treat the image as a single word in a circle.\n" " 10 = Treat the image as a single character.\n\n"); fprintf(stderr, "Single options:\n"); fprintf(stderr, " -v --version: version info\n"); fprintf(stderr, " --list-langs: list available languages for tesseract " "engine. Can be used with --tessdata-dir.\n"); fprintf(stderr, " --print-parameters: print tesseract parameters to the " "stdout.\n"); exit(1); } if (output != NULL && strcmp(output, "-") && strcmp(output, "stdout")) { tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); } PERF_COUNT_START("Tesseract:main") tesseract::TessBaseAPI api; api.SetOutputName(output); int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } char opt1[255], opt2[255]; for (arg = 0; arg < argc; arg++) { if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { strncpy(opt1, argv[arg + 1], 255); *(strchr(opt1, '=')) = 0; strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255); opt2[254] = 0; ++arg; if (!api.SetVariable(opt1, opt2)) { fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); } } } if (list_langs) { GenericVector<STRING> languages; api.GetAvailableLanguagesAsVector(&languages); fprintf(stderr, "List of available languages (%d):\n", languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; fprintf(stderr, "%s\n", string.string()); } api.End(); exit(0); } if (print_parameters) { FILE* fout = stdout; fprintf(stdout, "Tesseract parameters:\n"); api.PrintVariables(fout); api.End(); exit(0); } // We have 2 possible sources of pagesegmode: a config file and // the command line. For backwards compatability reasons, the // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the // default for this program is tesseract::PSM_AUTO. We will let // the config file take priority, so the command-line default // can take priority over the tesseract default, so we use the // value from the command line only if the retrieved mode // is still tesseract::PSM_SINGLE_BLOCK, indicating no change // in any config file. Therefore the only way to force // tesseract::PSM_SINGLE_BLOCK is from the command line. // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); bool stdInput = !strcmp(image, "stdin") || !strcmp(image, "-"); Pix* pixs = NULL; if (stdInput) { char byt; GenericVector<l_uint8> ch_data; std::istream file(std::cin.rdbuf()); #ifdef WIN32 if (_setmode(_fileno(stdin), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); #endif // WIN32 while (file.get(byt)) { ch_data.push_back(byt); } std::cin.ignore(std::cin.rdbuf()->in_avail() + 1); pixs = pixReadMem(&ch_data[0], ch_data.size()); } if (pagesegmode == tesseract::PSM_AUTO_ONLY || pagesegmode == tesseract::PSM_OSD_ONLY) { int ret_val = 0; if (!pixs) pixs = pixRead(image); if (!pixs) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } api.SetImage(pixs); if (pagesegmode == tesseract::PSM_OSD_ONLY) { OSResults osr; if (api.DetectOS(&osr)) { int orient = osr.best_result.orientation_id; int script_id = osr.get_best_script(orient); float orient_oco = osr.best_result.oconfidence; float orient_sco = osr.best_result.sconfidence; tprintf("Orientation: %d\nOrientation in degrees: %d\n" \ "Orientation confidence: %.2f\n" \ "Script: %d\nScript confidence: %.2f\n", orient, OrientationIdToValue(orient), orient_oco, script_id, orient_sco); } else { ret_val = 1; } } else { tesseract::Orientation orientation; tesseract::WritingDirection direction; tesseract::TextlineOrder order; float deskew_angle; tesseract::PageIterator* it = api.AnalyseLayout(); if (it) { it->Orientation(&orientation, &direction, &order, &deskew_angle); tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ "Deskew angle: %.4f\n", orientation, direction, order, deskew_angle); } else { ret_val = 1; } delete it; } pixDestroy(&pixs); exit(ret_val); } tesseract::TessResultRenderer* renderer = NULL; bool b; api.GetBoolVariable("tessedit_create_hocr", &b); if (b && renderer == NULL) renderer = new tesseract::TessHOcrRenderer(); api.GetBoolVariable("tessedit_create_pdf", &b); if (b && renderer == NULL) renderer = new tesseract::TessPDFRenderer(api.GetDatapath()); api.GetBoolVariable("tessedit_create_boxfile", &b); if (b && renderer == NULL) renderer = new tesseract::TessBoxTextRenderer(); if (renderer == NULL) renderer = new tesseract::TessTextRenderer(); if (pixs) { api.ProcessPage(pixs, 0, NULL, NULL, 0, renderer); pixDestroy(&pixs); } else { FILE* fin = fopen(image, "rb"); if (fin == NULL) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } fclose(fin); if (!api.ProcessPages(image, NULL, 0, renderer)) { fprintf(stderr, "Error during processing.\n"); exit(1); } } FILE* fout = stdout; if (strcmp(output, "-") && strcmp(output, "stdout")) { STRING outfile = STRING(output) + STRING(".") + STRING(renderer->file_extension()); fout = fopen(outfile.string(), "wb"); if (fout == NULL) { fprintf(stderr, "Cannot create output file %s\n", outfile.string()); exit(1); } } const char* data; inT32 data_len; if (renderer->GetOutput(&data, &data_len)) { fwrite(data, 1, data_len, fout); if (fout != stdout) fclose(fout); else clearerr(fout); } PERF_COUNT_END return 0; // Normal exit }
int main(void) { tprintf("%s", ""); const unsigned int big_size = 1024 / 8; unsigned int set_size; for (set_size = big_size; set_size; set_size >>= 1) { if (!k_sigprocmask(SIG_SETMASK, NULL, NULL, set_size)) break; tprintf("rt_sigprocmask(SIG_SETMASK, NULL, NULL, %u)" " = -1 EINVAL (%m)\n", set_size); } if (!set_size) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_SETMASK, NULL, NULL, %u) = 0\n", set_size); void *const k_set = tail_alloc(set_size); void *const old_set = tail_alloc(set_size); sigset_t *const libc_set = tail_alloc(sizeof(sigset_t)); memset(k_set, 0, set_size); if (k_sigprocmask(SIG_SETMASK, k_set, NULL, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_SETMASK, [], NULL, %u) = 0\n", set_size); if (k_sigprocmask(SIG_UNBLOCK, k_set - set_size, old_set, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_UNBLOCK, ~[], [], %u) = 0\n", set_size); assert(k_sigprocmask(SIG_SETMASK, k_set - set_size, old_set, set_size << 1) == -1); tprintf("rt_sigprocmask(SIG_SETMASK, %p, %p, %u) = -1 EINVAL (%m)\n", k_set - set_size, old_set, set_size << 1); iterate("~[]", k_set - set_size, old_set, set_size >> 1); sigemptyset(libc_set); sigaddset(libc_set, SIGHUP); memcpy(k_set, libc_set, set_size); if (k_sigprocmask(SIG_BLOCK, k_set, old_set, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_BLOCK, [HUP], [], %u) = 0\n", set_size); memset(libc_set, -1, sizeof(sigset_t)); sigdelset(libc_set, SIGHUP); memcpy(k_set, libc_set, set_size); if (k_sigprocmask(SIG_UNBLOCK, k_set, old_set, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_UNBLOCK, ~[HUP], [HUP], %u) = 0\n", set_size); sigdelset(libc_set, SIGKILL); memcpy(k_set, libc_set, set_size); if (k_sigprocmask(SIG_UNBLOCK, k_set, old_set, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_UNBLOCK, ~[HUP KILL], [HUP], %u) = 0\n", set_size); sigemptyset(libc_set); sigaddset(libc_set, SIGHUP); sigaddset(libc_set, SIGINT); sigaddset(libc_set, SIGQUIT); sigaddset(libc_set, SIGALRM); sigaddset(libc_set, SIGTERM); memcpy(k_set, libc_set, set_size); if (k_sigprocmask(SIG_BLOCK, k_set, old_set, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_BLOCK, %s, [HUP], %u) = 0\n", "[HUP INT QUIT ALRM TERM]", set_size); if (k_sigprocmask(SIG_SETMASK, NULL, old_set, set_size)) perror_msg_and_fail("rt_sigprocmask"); tprintf("rt_sigprocmask(SIG_SETMASK, NULL, %s, %u) = 0\n", "[HUP INT QUIT ALRM TERM]", set_size); assert(k_sigprocmask(SIG_SETMASK, k_set + (set_size >> 1), NULL, set_size) == -1); tprintf("rt_sigprocmask(SIG_SETMASK, %p, NULL, %u) = -1 EFAULT (%m)\n", k_set + (set_size >> 1), set_size); assert(k_sigprocmask(SIG_SETMASK, k_set, old_set + (set_size >> 1), set_size) == -1); tprintf("rt_sigprocmask(SIG_SETMASK, %s, %p, %u) = -1 EFAULT (%m)\n", "[HUP INT QUIT ALRM TERM]", old_set + (set_size >> 1), set_size); tprintf("+++ exited with 0 +++\n"); return 0; }
inT32 row_words( //compute space size TO_BLOCK *block, //block it came from TO_ROW *row, //row to operate on inT32 maxwidth, //max expected space size FCOORD rotation, //for drawing BOOL8 testing_on //for debug ) { BOOL8 testing_row; //contains testpt BOOL8 prev_valid; //if decent size BOOL8 this_valid; //current blob big enough inT32 prev_x; //end of prev blob inT32 min_gap; //min interesting gap inT32 cluster_count; //no of clusters inT32 gap_index; //which cluster inT32 smooth_factor; //for smoothing stats BLOBNBOX *blob; //current blob float lower, upper; //clustering parameters float gaps[3]; //gap clusers ICOORD testpt; TBOX blob_box; //bounding box //iterator BLOBNBOX_IT blob_it = row->blob_list (); STATS gap_stats (0, maxwidth); STATS cluster_stats[4]; //clusters testpt = ICOORD (textord_test_x, textord_test_y); smooth_factor = (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5); // if (testing_on) // tprintf("Row smooth factor=%d\n",smooth_factor); prev_valid = FALSE; prev_x = -MAX_INT32; testing_row = FALSE; for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); blob_box = blob->bounding_box (); if (blob_box.contains (testpt)) testing_row = TRUE; gap_stats.add (blob_box.width (), 1); } min_gap = (inT32) floor (gap_stats.ile (textord_words_width_ile)); gap_stats.clear (); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!blob->joined_to_prev ()) { blob_box = blob->bounding_box (); // this_valid=blob_box.width()>=min_gap; this_valid = TRUE; if (this_valid && prev_valid && blob_box.left () - prev_x < maxwidth) { gap_stats.add (blob_box.left () - prev_x, 1); } prev_x = blob_box.right (); prev_valid = this_valid; } } if (gap_stats.get_total () == 0) { row->min_space = 0; //no evidence row->max_nonspace = 0; return 0; } gap_stats.smooth (smooth_factor); lower = row->xheight * textord_words_initial_lower; upper = row->xheight * textord_words_initial_upper; cluster_count = gap_stats.cluster (lower, upper, textord_spacesize_ratioprop, 3, cluster_stats); while (cluster_count < 2 && ceil (lower) < floor (upper)) { //shrink gap upper = (upper * 3 + lower) / 4; lower = (lower * 3 + upper) / 4; cluster_count = gap_stats.cluster (lower, upper, textord_spacesize_ratioprop, 3, cluster_stats); } if (cluster_count < 2) { row->min_space = 0; //no evidence row->max_nonspace = 0; return 0; } for (gap_index = 0; gap_index < cluster_count; gap_index++) gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); //get medians if (cluster_count > 2) { if (testing_on && textord_show_initial_words) { tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n", row->intercept (), cluster_stats[1].ile (0.5), cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5)); } lower = gaps[0]; if (gaps[1] > lower) { upper = gaps[1]; //prefer most frequent if (upper < block->xheight * textord_words_min_minspace && gaps[2] > gaps[1]) { upper = gaps[2]; } } else if (gaps[2] > lower && gaps[2] >= block->xheight * textord_words_min_minspace) upper = gaps[2]; else if (lower >= block->xheight * textord_words_min_minspace) { upper = lower; //not nice lower = gaps[1]; if (testing_on && textord_show_initial_words) { tprintf ("Had to switch most common from lower to upper!!\n"); gap_stats.print (stdout, TRUE); } } else { row->min_space = 0; //no evidence row->max_nonspace = 0; return 0; } } else { if (gaps[1] < gaps[0]) { if (testing_on && textord_show_initial_words) { tprintf ("Had to switch most common from lower to upper!!\n"); gap_stats.print (stdout, TRUE); } lower = gaps[1]; upper = gaps[0]; } else { upper = gaps[1]; lower = gaps[0]; } } if (upper < block->xheight * textord_words_min_minspace) { row->min_space = 0; //no evidence row->max_nonspace = 0; return 0; } if (upper * 3 < block->min_space * 2 + block->max_nonspace || lower * 3 > block->min_space * 2 + block->max_nonspace) { if (testing_on && textord_show_initial_words) { tprintf ("Disagreement between block and row at %g!!\n", row->intercept ()); tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper); gap_stats.print (stdout, TRUE); } } row->min_space = (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread); row->max_nonspace = (inT32) floor (lower + (upper - lower) * textord_words_definite_spread); row->space_threshold = (row->max_nonspace + row->min_space) / 2; row->space_size = upper; row->kern_size = lower; if (testing_on && textord_show_initial_words) { if (testing_row) { tprintf ("GAP STATS\n"); gap_stats.print (stdout, TRUE); tprintf ("SPACE stats\n"); cluster_stats[2].print (stdout, FALSE); tprintf ("NONSPACE stats\n"); cluster_stats[1].print (stdout, FALSE); } tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept (), row->min_space, upper, row->max_nonspace, lower); } return cluster_stats[2].get_total (); }
// TODO(rays) Merge with outline_complexity. inT32 OL_BUCKETS::count_children( // recursive count C_OUTLINE *outline, // parent outline inT32 max_count // max output ) { BOOL8 parent_box; // could it be boxy inT16 xmin, xmax; // coord limits inT16 ymin, ymax; inT16 xindex, yindex; // current bucket C_OUTLINE *child; // current child inT32 child_count; // no of children inT32 grandchild_count; // no of grandchildren inT32 parent_area; // potential box FLOAT32 max_parent_area; // potential box inT32 child_area; // current child inT32 child_length; // current child TBOX olbox; C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); xmin =(olbox.left() - bl.x()) / BUCKETSIZE; xmax =(olbox.right() - bl.x()) / BUCKETSIZE; ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; ymax =(olbox.top() - bl.y()) / BUCKETSIZE; child_count = 0; grandchild_count = 0; parent_area = 0; max_parent_area = 0; parent_box = TRUE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); if (child_it.empty()) continue; for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { child = child_it.data(); if (child != outline && *child < *outline) { child_count++; if (child_count <= max_count) { int max_grand =(max_count - child_count) / edges_children_per_grandchild; if (max_grand > 0) grandchild_count += count_children(child, max_grand) * edges_children_per_grandchild; else grandchild_count += count_children(child, 1); } if (child_count + grandchild_count > max_count) { if (edges_debug) tprintf("Discarding parent with child count=%d, gc=%d\n", child_count,grandchild_count); return child_count + grandchild_count; } if (parent_area == 0) { parent_area = outline->outer_area(); if (parent_area < 0) parent_area = -parent_area; max_parent_area = outline->bounding_box().area() * edges_boxarea; if (parent_area < max_parent_area) parent_box = FALSE; } if (parent_box && (!edges_children_fix || child->bounding_box().height() > edges_min_nonhole)) { child_area = child->outer_area(); if (child_area < 0) child_area = -child_area; if (edges_children_fix) { if (parent_area - child_area < max_parent_area) { parent_box = FALSE; continue; } if (grandchild_count > 0) { if (edges_debug) tprintf("Discarding parent of area %d, child area=%d, max%g " "with gc=%d\n", parent_area, child_area, max_parent_area, grandchild_count); return max_count + 1; } child_length = child->pathlength(); if (child_length * child_length > child_area * edges_patharea_ratio) { if (edges_debug) tprintf("Discarding parent of area %d, child area=%d, max%g " "with child length=%d\n", parent_area, child_area, max_parent_area, child_length); return max_count + 1; } } if (child_area < child->bounding_box().area() * edges_childarea) { if (edges_debug) tprintf("Discarding parent of area %d, child area=%d, max%g " "with child rect=%d\n", parent_area, child_area, max_parent_area, child->bounding_box().area()); return max_count + 1; } } } } } } return child_count + grandchild_count; }
inT32 row_words2( //compute space size TO_BLOCK *block, //block it came from TO_ROW *row, //row to operate on inT32 maxwidth, //max expected space size FCOORD rotation, //for drawing BOOL8 testing_on //for debug ) { BOOL8 testing_row; //contains testpt BOOL8 prev_valid; //if decent size BOOL8 this_valid; //current blob big enough inT32 prev_x; //end of prev blob inT32 min_width; //min interesting width inT32 valid_count; //good gaps inT32 total_count; //total gaps inT32 cluster_count; //no of clusters inT32 prev_count; //previous cluster_count inT32 gap_index; //which cluster inT32 smooth_factor; //for smoothing stats BLOBNBOX *blob; //current blob float lower, upper; //clustering parameters ICOORD testpt; TBOX blob_box; //bounding box //iterator BLOBNBOX_IT blob_it = row->blob_list (); STATS gap_stats (0, maxwidth); //gap sizes float gaps[BLOCK_STATS_CLUSTERS]; STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; //clusters testpt = ICOORD (textord_test_x, textord_test_y); smooth_factor = (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5); // if (testing_on) // tprintf("Row smooth factor=%d\n",smooth_factor); prev_valid = FALSE; prev_x = -MAX_INT16; testing_row = FALSE; //min blob size min_width = (inT32) block->pr_space; total_count = 0; for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!blob->joined_to_prev ()) { blob_box = blob->bounding_box (); this_valid = blob_box.width () >= min_width; this_valid = TRUE; if (this_valid && prev_valid && blob_box.left () - prev_x < maxwidth) { gap_stats.add (blob_box.left () - prev_x, 1); } total_count++; //count possibles prev_x = blob_box.right (); prev_valid = this_valid; } } valid_count = gap_stats.get_total (); if (valid_count < total_count * textord_words_minlarge) { gap_stats.clear (); prev_x = -MAX_INT16; for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!blob->joined_to_prev ()) { blob_box = blob->bounding_box (); if (blob_box.left () - prev_x < maxwidth) { gap_stats.add (blob_box.left () - prev_x, 1); } prev_x = blob_box.right (); } } } if (gap_stats.get_total () == 0) { row->min_space = 0; //no evidence row->max_nonspace = 0; return 0; } cluster_count = 0; lower = block->xheight * words_initial_lower; upper = block->xheight * words_initial_upper; gap_stats.smooth (smooth_factor); do { prev_count = cluster_count; cluster_count = gap_stats.cluster (lower, upper, textord_spacesize_ratioprop, BLOCK_STATS_CLUSTERS, cluster_stats); } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); if (cluster_count < 1) { row->min_space = 0; row->max_nonspace = 0; return 0; } for (gap_index = 0; gap_index < cluster_count; gap_index++) gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); //get medians if (testing_on) { tprintf ("cluster_count=%d:", cluster_count); for (gap_index = 0; gap_index < cluster_count; gap_index++) tprintf (" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total ()); tprintf ("\n"); } //Try to find proportional non-space and space for row. for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] > block->max_nonspace; gap_index++); if (gap_index < cluster_count) lower = gaps[gap_index]; //most frequent below else { if (testing_on) tprintf ("No cluster below block threshold!, using default=%g\n", block->pr_nonsp); lower = block->pr_nonsp; } for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] <= block->max_nonspace; gap_index++); if (gap_index < cluster_count) upper = gaps[gap_index]; //most frequent above else { if (testing_on) tprintf ("No cluster above block threshold!, using default=%g\n", block->pr_space); upper = block->pr_space; } row->min_space = (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread); row->max_nonspace = (inT32) floor (lower + (upper - lower) * textord_words_definite_spread); row->space_threshold = (row->max_nonspace + row->min_space) / 2; row->space_size = upper; row->kern_size = lower; if (testing_on) { if (testing_row) { tprintf ("GAP STATS\n"); gap_stats.print (stdout, TRUE); tprintf ("SPACE stats\n"); cluster_stats[2].print (stdout, FALSE); tprintf ("NONSPACE stats\n"); cluster_stats[1].print (stdout, FALSE); } tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept (), row->min_space, upper, row->max_nonspace, lower); } return 1; }
static void decode_seccomp_set_mode_strict(unsigned int flags, unsigned long addr) { tprintf("%u, ", flags); printaddr(addr); }
double vp8_calcpsnr_tester(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *ypsnr, double *upsnr, double *vpsnr, double *sq_error, int print_out, int& possible_artifact) { int i, j; int diff; double frame_psnr; double total; double grand_total; unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; double sub_frame_ypsnr[16][16] = {0}; // break the frame into 16 by 16 double sub_frame_total[16][16] = {0}; // hold 16 by 16 frame total data // try to keep at least 64 pixel segments int width_segments = source->y_width / 64; int height_segments = width_segments; if(height_segments > 16) height_segments = 16; if(width_segments > 16) width_segments = 16; total = 0.0; grand_total = 0.0; // Loop throught the Y plane raw and reconstruction data summing // (square differences) for (i = 0; i < source->y_height; i++) { for (j = 0; j < source->y_width; j++) { diff = (int)(src[j]) - (int)(dst[j]); total += diff * diff; // gather totals for internal segments if(possible_artifact == kRunArtifactDetection) sub_frame_total[i / ((source->y_height / height_segments == 0) ? 1 : ((height_segments - 1) + source->y_height) / height_segments)] [j / ((source->y_width/width_segments ==0 ) ? 1 : ((width_segments - 1 ) + source->y_width) / width_segments)] += diff * diff; } src += source->y_stride; dst += dest->y_stride; } // Work out Y PSNR *ypsnr = vp8_mse_2_psnr_tester(source->y_height * source->y_width, 255.0, total); double max_psnr_1 = 0; double max_psnr_2 = 0; double max_psnr_3 = 0; double min_psnr = 61; if(possible_artifact == kRunArtifactDetection) { // Work out Y PSNRs for internal segments and find min and max for(i=0; i < height_segments; i++){ for(j=0; j < width_segments; j++){ int sub_frame_height = 1; if(i == (height_segments - 1)) sub_frame_height = source->y_height - ((height_segments-1) * (((height_segments - 1) + source->y_height) / height_segments)); else sub_frame_height = ((height_segments - 1) + source->y_height ) / height_segments; int sub_frame_width = 1; if(j == (width_segments - 1)) sub_frame_width = source->y_width - ((width_segments - 1) * (((width_segments - 1) + source->y_width) / width_segments)); else sub_frame_width = (15 + source->y_width) / width_segments; sub_frame_ypsnr[i][j] = vp8_mse_2_psnr_tester(sub_frame_height * sub_frame_width, 255.0, sub_frame_total[i][j]); // Get min and top three max sub psnrs if(sub_frame_ypsnr[i][j] != 60 && sub_frame_ypsnr[i][j] > max_psnr_1){ max_psnr_2 = max_psnr_1; max_psnr_3 = max_psnr_2; max_psnr_1 = sub_frame_ypsnr[i][j]; } else if(sub_frame_ypsnr[i][j] != 60 && sub_frame_ypsnr[i][j] > max_psnr_2){ max_psnr_3 = max_psnr_2; max_psnr_2 = sub_frame_ypsnr[i][j]; } else if(sub_frame_ypsnr[i][j] != 60 && sub_frame_ypsnr[i][j] > max_psnr_3){ max_psnr_3 = sub_frame_ypsnr[i][j]; } if(sub_frame_ypsnr[i][j] < min_psnr) min_psnr = sub_frame_ypsnr[i][j]; } } // if min sub psnr is not within ~57% of top three psnr // average then flag as potential artifact. if((max_psnr_1 + max_psnr_2 + max_psnr_3) / 7 >= min_psnr) possible_artifact = kPossibleArtifactFound; else possible_artifact = kNoArtifactFound; if(possible_artifact == kPossibleArtifactFound && print_out) { tprintf(print_out, "min: %.0f Max: %.0f %.0f %.0f", min_psnr, max_psnr_1, max_psnr_2, max_psnr_3); for(i=0; i < height_segments; i++){ tprintf(print_out, "\n"); for(int z = 0; z < (width_segments*3)+1; z++){ tprintf(print_out, "-"); } tprintf(print_out, "\n|"); for(j=0; j < width_segments; j++){ tprintf(print_out, "%.0f|",sub_frame_ypsnr[i][j]); } } tprintf(print_out, "\n"); for(int z = 0; z < (width_segments*3)+1; z++){ tprintf(print_out, "-"); } tprintf(print_out, "\n"); } } grand_total += total; total = 0; // Loop through the U plane src = source->u_buffer; dst = dest->u_buffer; for (i = 0; i < source->uv_height; i++) { for (j = 0; j < source->uv_width; j++) { diff = (int)(src[j]) - (int)(dst[j]); total += diff * diff; } src += source->uv_stride; dst += dest->uv_stride; } // Work out U PSNR *upsnr = vp8_mse_2_psnr_tester(source->uv_height * source->uv_width, 255.0, total); grand_total += total; total = 0; // V PSNR src = source->v_buffer; dst = dest->v_buffer; for (i = 0; i < source->uv_height; i++) { for (j = 0; j < source->uv_width; j++) { diff = (int)(src[j]) - (int)(dst[j]); total += diff * diff; } src += source->uv_stride; dst += dest->uv_stride; } // Work out UV PSNR *vpsnr = vp8_mse_2_psnr_tester(source->uv_height * source->uv_width, 255.0, total); grand_total += total; total = 0; // Work out total PSNR frame_psnr = vp8_mse_2_psnr_tester(source->y_height * source->y_width * 3 / 2 , 255.0, grand_total); *sq_error = 1.0 * grand_total; return frame_psnr; }
/** * @name cube_recognize * * Call cube on the current word, and write the result to word. * Sets up a fake result and returns false if something goes wrong. */ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block, WERD_RES *word) { // Run cube WordAltList *cube_alt_list = cube_obj->RecognizeWord(); if (!cube_alt_list || cube_alt_list->AltCount() <= 0) { if (cube_debug_level > 0) { tprintf("Cube returned nothing for word at:"); word->word->bounding_box().print(); } word->SetupFake(unicharset); return false; } // Get cube's best result and its probability, mapped to tesseract's // certainty range char_32 *cube_best_32 = cube_alt_list->Alt(0); double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0)); float cube_certainty = convert_prob_to_tess_certainty(cube_prob); string cube_best_str; CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str); // Retrieve Cube's character bounding boxes and CharSamples, // corresponding to the most recent call to RecognizeWord(). Boxa *char_boxes = NULL; CharSamp **char_samples = NULL;; int num_chars; if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples) && cube_debug_level > 0) { tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract " "cube state.\n"); word->SetupFake(unicharset); return false; } // Convert cube's character bounding boxes to a BoxWord. BoxWord cube_box_word; TBOX tess_word_box = word->word->bounding_box(); if (word->denorm.block() != NULL) tess_word_box.rotate(word->denorm.block()->re_rotation()); bool box_word_success = create_cube_box_word(char_boxes, num_chars, tess_word_box, &cube_box_word); boxaDestroy(&char_boxes); if (!box_word_success) { if (cube_debug_level > 0) { tprintf("Cube WARNING (Tesseract::cube_recognize): Could not " "create cube BoxWord\n"); } word->SetupFake(unicharset); return false; } // Fill tesseract result's fields with cube results fill_werd_res(cube_box_word, cube_best_str.c_str(), word); // Create cube's best choice. BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars]; for (int i = 0; i < num_chars; ++i) { UNICHAR_ID uch_id = cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel()); choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty, -1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER); } word->FakeClassifyWord(num_chars, choices); // within a word, cube recognizes the word in reading order. word->best_choice->set_unichars_in_script_order(true); delete [] choices; delete [] char_samples; // Some sanity checks ASSERT_HOST(word->best_choice->length() == word->reject_map.length()); if (cube_debug_level || classify_debug_level) { tprintf("Cube result: %s r=%g, c=%g\n", word->best_choice->unichar_string().string(), word->best_choice->rating(), word->best_choice->certainty()); } return true; }
static int parse_setup_cpu_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.cpu_list_str) return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); str0 = str = strdup(g->p.cpu_list_str); t = 0; BUG_ON(!str); tprintf("# binding tasks to CPUs:\n"); tprintf("# "); while (true) { int bind_cpu, bind_cpu_0, bind_cpu_1; char *tok, *tok_end, *tok_step, *tok_len, *tok_mul; int bind_len; int step; int mul; tok = strsep(&str, ","); if (!tok) break; tok_end = strstr(tok, "-"); dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); if (!tok_end) { /* Single CPU specified: */ bind_cpu_0 = bind_cpu_1 = atol(tok); } else { /* CPU range specified (for example: "5-11"): */ bind_cpu_0 = atol(tok); bind_cpu_1 = atol(tok_end + 1); } step = 1; tok_step = strstr(tok, "#"); if (tok_step) { step = atol(tok_step + 1); BUG_ON(step <= 0 || step >= g->p.nr_cpus); } /* * Mask length. * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4', * where the _4 means the next 4 CPUs are allowed. */ bind_len = 1; tok_len = strstr(tok, "_"); if (tok_len) { bind_len = atol(tok_len + 1); BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); } /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ mul = 1; tok_mul = strstr(tok, "x"); if (tok_mul) { mul = atol(tok_mul + 1); BUG_ON(mul <= 0); } dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); return -1; } BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); BUG_ON(bind_cpu_0 > bind_cpu_1); for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { int i; for (i = 0; i < mul; i++) { int cpu; if (t >= g->p.nr_tasks) { printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu); goto out; } td = g->threads + t; if (t) tprintf(","); if (bind_len > 1) { tprintf("%2d/%d", bind_cpu, bind_len); } else { tprintf("%2d", bind_cpu); } CPU_ZERO(&td->bind_cpumask); for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); CPU_SET(cpu, &td->bind_cpumask); } t++; } } } out: tprintf("\n"); if (t < g->p.nr_tasks) printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); free(str0); return 0; }
static void tprint_timeval(struct tcb *tcp, const struct timeval *tv) { tprintf("{%lu, %lu}", (unsigned long) tv->tv_sec, (unsigned long) tv->tv_usec); }
// Returns false if a unicharset file for the specified language was not found // or was invalid. // This function initializes TessdataManager. After TessdataManager is // no longer needed, TessdataManager::End() should be called. // // This function sets tessedit_oem_mode to the given OcrEngineMode oem, unless // it is OEM_DEFAULT, in which case the value of the variable will be obtained // from the language-specific config file (stored in [lang].traineddata), from // the config files specified on the command line or left as the default // OEM_TESSERACT_ONLY if none of the configs specify this variable. bool Tesseract::init_tesseract_lang_data( const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_init_params) { // Set the basename, compute the data directory. #if _BUILDASDLL imagebasename = textbase; /*name of image */ STRING dll_module_name; #ifdef __MSW32__ dll_module_name = tessedit_module_name; #endif if (getpath(arg0, dll_module_name, datadir) < 0) return false; #else main_setup(arg0, textbase); #endif // Set the language data path prefix lang = language != NULL ? language : "eng"; language_data_path_prefix = datadir; language_data_path_prefix += lang; language_data_path_prefix += "."; // Initialize TessdataManager. STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix; if (!tessdata_manager.Init(tessdata_path.string(), tessdata_manager_debug_level)) { return false; } // If a language specific config file (lang.config) exists, load it in. if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) { ParamUtils::ReadParamsFromFp( tessdata_manager.GetDataFilePtr(), tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG), false, this->params()); if (tessdata_manager_debug_level) { tprintf("Loaded language config file\n"); } } // Load tesseract variables from config files. This is done after loading // language-specific variables from [lang].traineddata file, so that custom // config files can override values in [lang].traineddata file. for (int i = 0; i < configs_size; ++i) { read_config_file(configs[i], set_only_init_params); } // Set params specified in vars_vec (done after setting params from config // files, so that params in vars_vec can override those from files). if (vars_vec != NULL && vars_values != NULL) { for (int i = 0; i < vars_vec->size(); ++i) { if (!ParamUtils::SetParam((*vars_vec)[i].string(), (*vars_values)[i].string(), set_only_init_params, this->params())) { tprintf("Error setting param %s\n", (*vars_vec)[i].string()); exit(1); } } } if (((STRING &)tessedit_write_params_to_file).length() > 0) { FILE *params_file = fopen(tessedit_write_params_to_file.string(), "wb"); if (params_file != NULL) { ParamUtils::PrintParams(params_file, this->params()); fclose(params_file); if (tessdata_manager_debug_level > 0) { tprintf("Wrote parameters to %s\n", tessedit_write_params_to_file.string()); } } else { tprintf("Failed to open %s for writing params.\n", tessedit_write_params_to_file.string()); } } // Determine which ocr engine(s) should be loaded and used for recognition. if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem); if (tessdata_manager_debug_level) { tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n", static_cast<int>(tessedit_ocr_engine_mode)); } // Load the unicharset if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) || !unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) { return false; } if (unicharset.size() > MAX_NUM_CLASSES) { tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n"); return false; } right_to_left_ = unicharset.any_right_to_left(); if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n"); if (!tessedit_ambigs_training && tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) { unichar_ambigs.LoadUnicharAmbigs( tessdata_manager.GetDataFilePtr(), tessdata_manager.GetEndOffset(TESSDATA_AMBIGS), ambigs_debug_level, use_ambigs_for_adaption, &unicharset); if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n"); } // Load Cube objects if necessary. if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) { ASSERT_HOST(init_cube_objects(false, &tessdata_manager)); if (tessdata_manager_debug_level) tprintf("Loaded Cube w/out combiner\n"); } else if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) { ASSERT_HOST(init_cube_objects(true, &tessdata_manager)); if (tessdata_manager_debug_level) tprintf("Loaded Cube with combiner\n"); } return true; }
static void dump_enemy() { /* .scroll_line = 1, .weapon = EW_GUN, .x = 100, .y = 100, .route = { [3] = { .shape = ES_JEEP, .dir = DIR16_WNW, .start_step = 0, .vel = 2, }, [4] = { .shape = ES_SOLDIER1_RIGHT, .dir = DIR16_NNW, .start_step = 128, .vel = 2, }, }, */ #define tprintf(tabs, args...) do { int __tabs; for(__tabs = 0; __tabs < tabs; __tabs++) printf("\t"); printf(args); } while(0) printf("XXX screen %d\n", map_spawn_screen_index); tprintf(2, ".scroll_line = %d,\n", tag_enemy_spawnline); tprintf(2, ".weapon = %s,\n", enemy_weapon_string_lut[tag_enemy.weapon]); tprintf(2, ".x = %d,\n", tag_enemy.x); tprintf(2, ".y = %d,\n", tag_enemy_y); tprintf(2, ".route = {\n"); int i; for(i = 0; i < ENEMY_MAX_ROUTE && tag_enemy.route[i].shape != ES_INVALID; i++) { tprintf(3, "[%d] = {\n", i); tprintf(4, ".shape = %s,\n", enemy_shape_string_lut[tag_enemy.route[i].shape]); tprintf(4, ".dir = %s,\n", dir16_string_lut[tag_enemy.route[i].dir]); tprintf(4, ".start_step = %d,\n", tag_enemy.route[i].start_step); tprintf(4, ".vel = %d,\n", tag_enemy.route[i].vel); tprintf(3, "},\n"); } tprintf(2, "},\n"); tprintf(2, ".shots = {\n"); for(i = 0; i < ENEMY_MAX_SHOT; i++) tprintf(3, "[%d] = %d,\n", i, tag_enemy.shots[i]); tprintf(2, "},\n"); }
void WERD::print() { tprintf("Blanks= %d\n", blanks); bounding_box().print(); tprintf("Flags = %d = 0%o\n", flags.val, flags.val); tprintf(" W_SEGMENTED = %s\n", flags.bit(W_SEGMENTED) ? "TRUE" : "FALSE "); tprintf(" W_ITALIC = %s\n", flags.bit(W_ITALIC) ? "TRUE" : "FALSE "); tprintf(" W_BOL = %s\n", flags.bit(W_BOL) ? "TRUE" : "FALSE "); tprintf(" W_EOL = %s\n", flags.bit(W_EOL) ? "TRUE" : "FALSE "); tprintf(" W_NORMALIZED = %s\n", flags.bit(W_NORMALIZED) ? "TRUE" : "FALSE "); tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n", flags.bit(W_SCRIPT_HAS_XHEIGHT) ? "TRUE" : "FALSE "); tprintf(" W_SCRIPT_IS_LATIN = %s\n", flags.bit(W_SCRIPT_IS_LATIN) ? "TRUE" : "FALSE "); tprintf(" W_DONT_CHOP = %s\n", flags.bit(W_DONT_CHOP) ? "TRUE" : "FALSE "); tprintf(" W_REP_CHAR = %s\n", flags.bit(W_REP_CHAR) ? "TRUE" : "FALSE "); tprintf(" W_FUZZY_SP = %s\n", flags.bit(W_FUZZY_SP) ? "TRUE" : "FALSE "); tprintf(" W_FUZZY_NON = %s\n", flags.bit(W_FUZZY_NON) ? "TRUE" : "FALSE "); tprintf("Correct= %s\n", correct.string()); tprintf("Rejected cblob count = %d\n", rej_cblobs.length()); tprintf("Script = %d\n", script_id_); }
// Creates a report of the error rate. The report_level controls the detail // that is reported to stderr via tprintf: // 0 -> no output. // >=1 -> bottom-line error rate. // >=3 -> font-level error rate. // boosting_mode determines the return value. It selects which (un-weighted) // error rate to return. // The fontinfo_table from MasterTrainer provides the names of fonts. // The it determines the current subset of the training samples. // If not NULL, the top-choice unichar error rate is saved in unichar_error. // If not NULL, the report string is saved in fonts_report. // (Ignoring report_level). double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, const FontInfoTable& fontinfo_table, const SampleIterator& it, double* unichar_error, STRING* fonts_report) { // Compute totals over all the fonts and report individual font results // when required. Counts totals; int fontsize = font_counts_.size(); for (int f = 0; f < fontsize; ++f) { // Accumulate counts over fonts. totals += font_counts_[f]; STRING font_report; if (ReportString(false, font_counts_[f], &font_report)) { if (fonts_report != NULL) { *fonts_report += fontinfo_table.get(f).name; *fonts_report += ": "; *fonts_report += font_report; *fonts_report += "\n"; } if (report_level > 2) { // Report individual font error rates. tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string()); } } } // Report the totals. STRING total_report; bool any_results = ReportString(true, totals, &total_report); if (fonts_report != NULL && fonts_report->length() == 0) { // Make sure we return something even if there were no samples. *fonts_report = "NoSamplesFound: "; *fonts_report += total_report; *fonts_report += "\n"; } if (report_level > 0) { // Report the totals. STRING total_report; if (any_results) { tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.string()); } // Report the worst substitution error only for now. if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) { int charsetsize = unicharset_.size(); int worst_uni_id = 0; int worst_result_id = 0; int worst_err = 0; for (int u = 0; u < charsetsize; ++u) { for (int v = 0; v < charsetsize; ++v) { if (unichar_counts_(u, v) > worst_err) { worst_err = unichar_counts_(u, v); worst_uni_id = u; worst_result_id = v; } } } if (worst_err > 0) { tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id, unicharset_.id_to_unichar(worst_uni_id), unicharset_.id_to_unichar(worst_result_id), worst_err, totals.n[CT_UNICHAR_TOP1_ERR], 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]); } } tprintf("Multi-unichar shape use:\n"); for (int u = 0; u < multi_unichar_counts_.size(); ++u) { if (multi_unichar_counts_[u] > 0) { tprintf("%d multiple answers for unichar: %s\n", multi_unichar_counts_[u], unicharset_.id_to_unichar(u)); } } tprintf("OK Score histogram:\n"); ok_score_hist_.print(); tprintf("ERROR Score histogram:\n"); bad_score_hist_.print(); } double rates[CT_SIZE]; if (!ComputeRates(totals, rates)) return 0.0; // Set output values if asked for. if (unichar_error != NULL) *unichar_error = rates[CT_UNICHAR_TOP1_ERR]; return rates[boosting_mode]; }
/** * Segment the page according to the current value of tessedit_pageseg_mode. * pix_binary_ is used as the source image and should not be NULL. * On return the blocks list owns all the constructed page layout. */ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr) { ASSERT_HOST(pix_binary_ != NULL); int width = pixGetWidth(pix_binary_); int height = pixGetHeight(pix_binary_); // Get page segmentation mode. PageSegMode pageseg_mode = static_cast<PageSegMode>( static_cast<int>(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); if (lastdot != NULL) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block->set_right_to_left(right_to_left()); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_BLOCK. pageseg_mode = PSM_SINGLE_BLOCK; } // The diacritic_blobs holds noise blobs that may be diacritics. They // are separated out on areas of the image that seem noisy and short-circuit // the layout process, going straight from the initial partition creation // right through to after word segmentation, where they are added to the // rej_cblobs list of the most appropriate word. From there classification // will determine whether they are used. BLOBNBOX_LIST diacritic_blobs; int auto_page_seg_ret_val = 0; TO_BLOCK_LIST to_blocks; if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) { auto_page_seg_ret_val = AutoPageSeg( pageseg_mode, blocks, &to_blocks, enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr); if (pageseg_mode == PSM_OSD_ONLY) return auto_page_seg_ret_val; // To create blobs from the image region bounds uncomment this line: // to_blocks.clear(); // Uncomment to go back to the old mode. } else { deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); if (pageseg_mode == PSM_CIRCLE_WORD) { Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); if (pixcleaned != NULL) { pixDestroy(&pix_binary_); pix_binary_ = pixcleaned; } } } if (auto_page_seg_ret_val < 0) { return -1; } if (blocks->empty()) { if (textord_debug_tabfind) tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } bool splitting = pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT; bool cjk_mode = textord_use_cjk_fp_model; textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_, pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks); return auto_page_seg_ret_val; }