int16_t german_context_process( cell* C ) { int16_t return_code; cell * wc; uchar i, wp; int16_t nv_c, nv_h, nv_b; return_code = NO_ACTION; if ( ( n_ltr == 1 ) && ( C->vers[0].let == ss_deaf_sound ) ) { C->vers[0].prob = MAX( MIN_PROB, C->vers[0].prob - GER_DOUBLE_S_MONUS ); sort_vers( C ); return_code = CONTINUE_ACTION; goto GCP_Ret; } if ( n_ltr > 1 ) { if ( C->vers[0].let == ss_deaf_sound ) { C->vers[0].prob = MAX( MIN_PROB, C->vers[0].prob - GER_DOUBLE_S_MONUS ); sort_vers( C ); } for ( i = 0, wc = C; i < n_ltr - 1; wc = wc->nextl, i++ ) { nv_c = get_nvers( wc, 'c' ); if ( nv_c < 0 ) continue; if ( ( nv_c > 1 ) || ( wc->vers[nv_c].prob < ADM_PROB ) ) continue; nv_h = get_nvers( wc->nextl, 'h' ); if ( nv_h < 0 ) continue; nv_b = get_nvers( wc->nextl, 'b' ); if ( nv_b < 0 ) continue; if ( ( nv_h > nv_b ) && ( wc->nextl->vers[nv_b].prob - wc->nextl->vers[nv_h].prob < REFUSE_BOUND ) ) { wp = wc->nextl->vers[nv_h].prob; wc->nextl->vers[nv_h].prob = wc->nextl->vers[nv_b].prob; wc->nextl->vers[nv_b].prob = wp; sort_vers( wc->nextl ); } } } GCP_Ret: return return_code; }
static int16_t final_slash_l(cell *c) { int16_t i; uchar fnt; get_b_lines(c,&bl); if (c->row+c->h>=bl.b3+MIN(3,MAX(1,c->h/10))) return 0; if ((c->flg & (c_f_let|c_f_bad)) == 0) return 0; // AL 940318 if (c->flg & c_f_dust) return 0; // AL 940318 if ((fnt=c->prevl->font|c->nextl->font)&c_fp_ser && !(fnt&c_fp_gelv)) return 0; c->nvers-=MAX(0,c->nvers-(VERS_IN_CELL-4)); c->vers[c->nvers].let='l'; c->vers[c->nvers+1].let='I'; c->vers[c->nvers+2].let='1'; c->vers[c->nvers].prob=c->vers[c->nvers+1].prob=c->vers[c->nvers+2].prob= c->vers[0].prob; for (i=0; i<c->nvers; i++) if (memchr("lI1",c->vers[i].let,3)) c->vers[i].prob=0; c->nvers+=3; c->vers[c->nvers].let=0; if (c->env!=NULL && !is_slash(c)) c->vers[0].prob=MAX(2,c->vers[0].prob-50); sort_vers(c); return 1; }
static int16_t LiliI(cell *NC) // // This procedure gives bonus to 'I' in one letter word. // { int16_t i; version *v; bon_to_dig(NC); // monus to non- "1" for (i=0,v=NC->vers; i < NC->nvers; i++,v++) { if (v->let == 'I') goto monus; } return 0; monus:; for (i=0,v=NC->vers; i < NC->nvers; i++,v++) { if (v->let == 'I') continue; v->prob=MAX(v->prob-MONUS,10); // didn't expect number } sort_vers(NC); return 1; }
void AKClearVers() { cell * Count; char Let; //uchar Prob; int i; // проверяем цепочку на случай зацикливания или обрыва AKCheckChain(); for (Count = cell_f()->next; Count != cell_l(); Count=Count->next) { for ( i = 0; /*Count->vers[i].let != 0x0 &&*/ i < VERS_IN_CELL/*Count->nvers*/ ; i++ ) { Let = Count->vers[i].let; if ( !proplet(Let) /*&& Let != bad_char*/ ) { // если символ не от сюда Count->vers[i].let = 0; //bad_char; Count->vers[i].prob = 0; } } sort_vers(Count); /* if (Count->vers[0].let == bad_char) { Count = del_cell(Count); } */ } }
int16_t short_recog_cell (cell *c) { // was && но если нет c->evn - что распознавать? if( (c->cg_flag & c_cg_comp) || (!c->env) ) return 0; comp_from_kit(c); { int16_t n,i; uchar res[20]; CCOM_comp cc; //запись в структуру CCOM_comp данных для EVNRecog_lp cc.h = c->env->h; cc.w = c->env->w; cc.rw = c->env->rw; cc.nl = c->env->nl; cc.begs = c->env->begs; cc.ends = c->env->ends; cc.scale = c->env->scale; // n = (int16_t)EVNRecog_lp(c->env,lpool,lpool_lth,&res[0]); n = (int16_t)EVNRecog_lp(&cc,lpool,lpool_lth,&res[0]); //восстановление из структуры CCOM_comp результатов EVNRecog_lp c->env->type = cc.type; c->env->cs = cc.cs; c->env->pidx = cc.pidx; c->env->reasno = cc.reasno; if( n ) { for(i=0;i<n;i++) { c->vers[i].let=res[i]; c->vers[i].prob=254; } c->vers[n].let=0; c->flg = c_f_let; c->recsource |= c_rs_ev; c->history |= c_rs_ev; } else { c->vers[0].let=176; c->vers[1].let=0; c->flg = c_f_bad; } c->nvers=n; } //recog_letter(); v2_pidx_crit(c); sort_vers(c); //c->reasno = wcomp.reasno; //return vers_to_cell(c); return c->nvers; }
void discrim_un( cell *GC, s_glue *GL, int16_t cod_let ) { int16_t wi; memset ( l_tab_shift, 0, sizeof (l_tab_shift) ); memset ( r_tab_shift, 0, sizeof (r_tab_shift) ); pen_u = 0; pen_n = 0; un_code = (uchar)cod_let; comp_row_col( GL ); // compute coordinates of the composed box // check italic incline of the component wi = letincl(GC); if ( ((GC->font|GC->font_new) & c_fp_it) && (GL->ncell == 1) ) return; make_straight_abris( GL, l_tab_shift, r_tab_shift ); if (GL->ncell > 1) { if ( is_italic() ) { compare_corners_mass(); goto ApplyPenalty;} else return; } if ( cod_let & Let_n ) { pen_n = n_around_ur_corner(GL); // study right upper corner against arounding pen_n += left_right_dist(); } if ( cod_let & Let_u ) pen_u = u_around_bl_corner(GL); // study left bottom corner against arounding ApplyPenalty: if ( pen_u ) { cell_bonus_let(GC, 'u', (int16_t)(-(pen_u)) ); sort_vers( GC ); } if ( pen_n ) { cell_bonus_let(GC, 'n', (int16_t)(-(pen_n)) ); sort_vers( GC ); } }
static void final_bh(cell *c) { int16_t i; for (i=1; i<c->nvers; i++) if (memchr("bh",c->vers[i].let,2)) { c->vers[i].prob=MAX(c->vers[i].prob,c->vers[0].prob-10); sort_vers(c); return; } if (c->nvers==VERS_IN_CELL-1) c->nvers--; c->vers[c->nvers].let=(c->vers[0].let=='b')?'h':'b'; c->vers[c->nvers].prob=c->vers[0].prob-10; c->nvers++; c->vers[c->nvers].let=0; sort_vers(c); }
static void final_eg(cell *c) { if (c->vers[0].prob<=DEG) return; if (c->nvers==VERS_IN_CELL-1) c->nvers--; c->vers[c->nvers].let='g'; c->vers[c->nvers].prob=c->vers[0].prob-DEG; c->nvers++; c->vers[c->nvers].let=0; sort_vers(c); }
void add_vers(cell *bc, version *wv) { int16_t nv; nv=bc->nvers; if (nv == VERS_IN_CELL-1) { nv--; if (bc->vers[nv].prob >= wv->prob) return; bc->vers[nv].let=wv->let; bc->vers[nv].prob=wv->prob; sort_vers(bc); } else { bc->vers[bc->nvers].let=wv->let; bc->vers[bc->nvers].prob=wv->prob; bc->nvers++; sort_vers(bc); } }
static void final_OQ(cell *c) { if (c->vers[0].prob<=DOQ) return; if (c->nvers==VERS_IN_CELL-1) c->nvers--; c->vers[c->nvers].let='Q'; c->vers[c->nvers].prob=c->vers[0].prob-DOQ; c->nvers++; c->vers[c->nvers].let=0; sort_vers(c); }
int16_t swedish_context_process ( cell* C ) { int16_t return_code; swed_lju_flag = FALSE; return_code = NO_ACTION; if (n_ltr == 1 && !(C->next->flg&c_f_punct && C->next->vers[0].let=='.')) /** single letter case: **/ { LiliI(C); return_code = CONTINUE_ACTION; goto SwCP_Ret; } if ( n_ltr > 2 ) // Process 'lju' at start of word { int16_t nv_l, nv_j, nv_u; nv_l = get_nvers( C, 'l'); if ( nv_l < 0 ) goto SwCP_Ret; if ( C->vers[ nv_l ].prob < ADM_PROB ) goto SwCP_Ret; nv_j = get_nvers( C->nextl, 'j'); if ( nv_j < 0 ) goto SwCP_Ret; if ( C->nextl->vers[ nv_j ].prob < ADM_PROB ) goto SwCP_Ret; nv_u = get_nvers( C->nextl->nextl, 'u'); if ( nv_u < 0 ) goto SwCP_Ret; if ( C->nextl->nextl->vers[ nv_u ].prob < ADM_PROB ) goto SwCP_Ret; cell_bonus( C, &(C->vers[nv_l]), SWED_LJU_BONUS ); sort_vers( C ); cell_bonus( C->nextl, &(C->nextl->vers[nv_j]), SWED_LJU_BONUS ); sort_vers( C->nextl ); cell_bonus( C->nextl->nextl, &(C->nextl->nextl->vers[nv_u]), SWED_LJU_BONUS ); sort_vers( C->nextl->nextl ); swed_lju_flag = TRUE; } SwCP_Ret: return return_code; }
static Bool apostr_ll(cell *NC) // // This procedure gives bonus to ll in two letters word // if there is after apostrophe. Example: he'll // ON ENTRY: NC - address of cell for first letter. // { int16_t i; version *v; for (i=0,v=NC->vers; i < NC->nvers; i++,v++) if (v->let == 'l') goto second; return FALSE; second:; for (i=0,v=NC->nextl->vers; i < NC->nextl->nvers; i++,v++) if (v->let == 'l') goto monus; return FALSE; monus:; for (i=0,v=NC->vers; i < NC->nvers; i++,v++) { if (v->let=='l') continue; v->prob=MAX(v->prob-MONUS,10); // didn't expect other than l } sort_vers(NC); for (i=0,v=NC->nextl->vers; i < NC->nextl->nvers; i++,v++) { if (v->let == 'l') continue; v->prob=MAX(v->prob-MONUS,10); // didn't expect other than l } sort_vers(NC->nextl); return TRUE; }
void dif_II( void ) { c_comp *cp; lnhead *lp; int16_t h=workI.c->h/2,ii=0; if( !workI.twoDot && workI.ld == NULL && workI.rd && workI.rh > 0 ){ workI.c->vers[1].prob -= 20; goto ret_dif; } if( workI.ld && workI.rd && /*workI.c->prevl->vers[0].let != UKR_i && */ abs(workI.lh-workI.rh) >= 2 && (workI.lw > 2*workI.rw || workI.rw > 2*workI.lw) ){ workI.c->vers[1].prob -= 20; goto ret_dif; } if( workI.ld && workI.rd && workI.lh > 0 && workI.rh > 0 ){ workI.c->vers[0].prob -= 20; goto ret_dif; } cp = workI.c->env; for(;cp!=NULL;cp=cp->next){ lp = (lnhead *)((puchar)cp + cp->lines + sizeof(uint16_t)); while( lp->lth != 0 ){ if( lp->row <= workI.c->h/8 ){ ii++; if( h > lp->h ) h = lp->h; } lp = (lnhead *)((puchar)lp + lp->lth); } } workI.c->vers[0].prob -= 24*workI.twoDot; if( ii >= 2 && h >= 2 ){ if( !workI.twoDot ) workI.c->vers[0].prob -= 20; } else workI.c->vers[1].prob -= 20; ret_dif: sort_vers(workI.c); return; }/*dif_II*/
static int16_t bon_to_dig(cell *NC) // // This procedure gives bonus to digits in one letter word. // { int16_t i; version *v; for (i=0,v=NC->vers; i < NC->nvers; i++,v++) if ((v->let >= '0') && (v->let <= '9')) goto monus; return 0; monus:; for (i=0,v=NC->vers; i < NC->nvers; i++,v++) if ((v->let < '0') || (v->let > '9')) v->prob=MAX(v->prob-MONUS+MONUS1I,10); // didn't expect other than 1 sort_vers(NC); return 1; }
void test_O_and_slash() { cell *C; int16_t nv; C = cell_f(); while ((C=C->nextl)->nextl) { nv = get_nvers(C, '0'); if ( nv >= 0 ) // Minimum decrease for 'O' to prefer '0' in the cases without // obvious decision { #ifndef UFA if( language!=PUMA_LANG_RUSSIAN ) cell_bonus(C, &(C->vers[nv]), 2); #else if( language!=PUMA_LANG_RUSSIAN ) cell_bonus(C, &(C->vers[nv]), 2); else if( language==PUMA_LANG_RUSSIAN ) { if( get_nvers(C,'9')<0 && get_nvers(C,'6')<0 && get_nvers(C,'8')<0 ) cell_bonus(C, &(C->vers[nv]), 2); // Oleg : 06-09-1994 : not exist '698' - bonus 0 else { // Oleg : 06-09-1994 : exist any '698'-version nv=get_nvers(C,'Ћ'); if( nv>=0 ) cell_bonus(C, &(C->vers[nv]), -2); // monus O } } #endif sort_vers(C); } nv = get_nvers(C, '/'); if ( nv == -1 ) continue; if( language == PUMA_LANG_RUSSIAN && langSer && nv && C->nvers && C->vers[0].let==SERB_j ) continue; inc_or_dec_slash(C, nv); } }
static void inc_or_dec_slash(cell *wc, int16_t numv) // // This procedure decreases probability '/' if its neighbour letters are // italic ( to prefer 'l' or 'I' ) and // increases probability '/' if its heighbour letters are straight // ( to avoid 'l' and 'I' appearence ); // { cell *cprev, *cnext; Bool no_Il = (get_nvers(wc, 'I')==-1) && (get_nvers(wc, 'l')==-1 && wc->vers[numv].let=='/'); if ( wc->vers[0].prob - wc->vers[numv].prob > REFUSE_BOUND ) return; cprev = wc->prev; cnext = wc->next; if ( !((cprev->flg & c_f_let) && (cnext->flg & c_f_let)) ) return; if ( ((cprev->font|cprev->font_new) & c_fp_it) && ((cnext->font|cnext->font_new) & c_fp_it) ) { cell_bonus(wc, &(wc->vers[numv]), (int16_t)(no_Il?2:-BONUS_SLASH)); goto IDS_Sort; } if ( ((cprev->font|cprev->font_new) & c_fp_str) && ((cnext->font|cnext->font_new) & c_fp_str) ) cell_bonus(wc, &(wc->vers[numv]), (int16_t)(no_Il?2:BONUS_SLASH)); IDS_Sort: sort_vers(wc); }
static int16_t final_no_slash(cell *c) { int16_t i; uchar fnt; if (c->env==NULL) return 0; for (i=1; i<c->nvers; i++) if (c->vers[i].let=='/') break; if (i==c->nvers) return 0; if ((fnt=c->prevl->font|c->nextl->font)&c_fp_ser && !(fnt&c_fp_gelv) && !(c->font&c_fp_ser)) { c->vers[i].let=c->vers[0].let; c->vers[0].let='/'; return 1; } if (is_slash(c)) return 0; c->vers[i].prob=0; sort_vers(c); return 1; }
//////////////////// ///////////////// // Tools.c int16_t short_recog_cell(CSTR_rast c, int line_scale) { int16_t n, i; uchar res[20]; CSTR_rast_attr attr; CCOM_comp *comp; // ExtComponent ec; int16_t *pint16; uchar * lpool; UniVersions vers = { 0 }; CSTR_GetAttr(c, &attr); comp = CSTR_GetComp(c); // исходно была ошибка Tools.c - // если c->env == 0 в lpool лежало неизвестно что! // if( (c->cg_flag & CSTR_cg_comp) && !c->env ) // return 0; if ((attr.cg_flag & CSTR_cg_comp) || !comp) return 0; // comp_from_kit(c); // CCOM_comp -> to ExtComponent /* memset(&ec,0,sizeof(ExtComponent)); if( !Ccomp2ExtComponenet(comp,&ec,&attr,line_scale) ) return 0; */ // n = (int16_t)EVNRecog_lp(c->env,lpool,lpool_lth,&res[0]); pint16 = (int16_t *) comp->linerep; // *pint16 == comp->size_linerep ????? lpool = comp->linerep + 2; // n = (int16_t)EVNRecog_lp(&ec,lpool,*pint16,&res[0]); n = (int16_t) EVNRecog_lp(comp, lpool, *pint16, &res[0]); vers.lnAltMax = REC_MAX_VERS; if (n) { for (i = 0; i < n; i++) { //c->vers[i].let=res[i]; //c->vers[i].prob=254; vers.Alt[i].Liga = res[i]; vers.Alt[i].Prob = 254; vers.Alt[i].Method = REC_METHOD_EVN; } attr.flg = CSTR_f_let; attr.recsource |= CSTR_rs_ev; attr.RecogHistory |= CSTR_rs_ev; } else { attr.flg = CSTR_f_bad; } vers.lnAltCnt = n; CSTR_StoreCollectionUni(c, &vers); CSTR_SetAttr(c, &attr); v2_pidx_crit(c); sort_vers(c); CSTR_GetCollectionUni(c, &vers); return (int16_t) vers.lnAltCnt; }
int16_t franch_context_process( cell* C ) { int16_t return_code; int16_t i, nvI, nvl, nvs; return_code = NO_ACTION; if (n_ltr == 1 && (C->next->flg&c_f_punct) && (C->next->vers[0].let== APOSTROPH ) ) /**** shorted article case: *****/ { for ( i = 0; i < C->nvers; i++ ) { switch( C->vers[i].let ) { case 'l': case 'd': case 'L': case 'D': { C->vers[i].prob = MIN( MAX_PROB, C->vers[i].prob + FRANCH_ART_BONUS ); return_code = CONTINUE_ACTION; break; } default : { C->vers[i].prob = MAX( MIN_PROB, C->vers[i].prob - FRANCH_ART_BONUS ); } } } if ( return_code == CONTINUE_ACTION ) { sort_vers( C ); goto FCP_Ret; } } if ( ( (n_ltr == 2) || (n_ltr == 3) ) && ( ( word_flag & AFTERDOTCOM ) || ( word_flag & AFTERDOT ) || ( C->prev->flg & c_f_fict ) ) ) /*** "Il" or "Ils" case ***/ { nvI = get_nvers( C, 'I' ); if ( nvI < 0 ) goto FCP_Ret; if ( C->vers[ nvI ].prob < ADM_PROB ) goto FCP_Ret; nvl = get_nvers( C->nextl, 'l' ); if ( nvl < 0 ) goto FCP_Ret; if ( C->nextl->vers[ nvl ].prob < ADM_PROB ) goto FCP_Ret; if ( n_ltr == 2 ) goto FCP_Il_case; nvs = get_nvers( C->nextl->nextl, 's' ); if ( nvs < 0 ) goto FCP_Ret; if ( C->nextl->nextl->vers[ nvs ].prob < ADM_PROB ) goto FCP_Ret; vers_to_first_place( C->nextl->nextl, nvs ); FCP_Il_case: vers_to_first_place( C, nvI ); vers_to_first_place( C->nextl, nvl ); return_code = CONTINUE_ACTION; } FCP_Ret: return return_code; }