void setup_string() { int32_t number_of_cells=((intptr_t)CellsPageEnd_rstr-(intptr_t)CellsPage_rstr)/sizeof (cell); empty_cell=(cell *) (CellsPage_rstr); cell_boundary = empty_cell + number_of_cells;//NUMBER_OF_CELLS; free_cell_chain=NULL; //Paul 04-04-2002 cell_f()->next =cell_l(); cell_f()->nextl =cell_l(); cell_l()->prev =cell_f(); cell_l()->prevl =cell_f(); // }
void serbian_J2j(void) { B_LINES my_bases; cell *c,*e; int16_t n; char bb[90]; get_b_lines(NULL,&my_bases); for(c=cell_f()->nextl,e=cell_l();c!=e;c=c->nextl) if( (c->flg&c_f_let) && c->nvers && c->w*3<=c->h && (c->vers[0].let=='J' || memchr("[]!",c->vers[0].let,3)&& c->row<(my_bases.b2 + my_bases.b1)/2&& c->row+c->h>(my_bases.b3 + my_bases.b4)/2) ) { if( (n=geom_neck((int16_t)(my_bases.b2 - my_bases.b1),c))!=0 ) { if( db_status && snap_activity('c') ) { sprintf (bb,"serbian reverse J-->>j. bs= %d %d (%d). find neck on %d", my_bases.b1, my_bases.b2, my_bases.b2 - my_bases.b1, n); snap_show_text(bb); snap_monitor(); } c->vers[0].let='j'; } if( c->nvers && c->vers[0].let=='j' && check_let(c, '/') ) del_version( c,'/'); } return; }
void add_digital_versions(void) { cell *curr; uchar c,p; curr = cell_f(); while( (curr=curr->next)->next ) { if( !(curr->flg&(c_f_let|c_f_bad)) ) continue; c = curr->vers[0].let; p = curr->vers[0].prob; if( c=='|' && curr->nvers==2 ) { c = curr->vers[1].let; p = curr->vers[1].prob; } if( (curr->nvers==1 || c==(uchar)'Т' && curr->nvers==2)&& p>220 ) switch( c ) { case (uchar)'Т' : add_stick_vers (curr, '1', p);break; case (uchar)'°' : add_stick_vers (curr, '6', p);break; default : break; } } return; }
void AKClearVers() { cell * Count; char Let; //uchar Prob; int i; // проверяем цепочку на случай зацикливания или обрыва AKCheckChain(); for (Count = cell_f()->next; Count != cell_l(); Count=Count->next) { for ( i = 0; /*Count->vers[i].let != 0x0 &&*/ i < VERS_IN_CELL/*Count->nvers*/ ; i++ ) { Let = Count->vers[i].let; if ( !proplet(Let) /*&& Let != bad_char*/ ) { // если символ не от сюда Count->vers[i].let = 0; //bad_char; Count->vers[i].prob = 0; } } sort_vers(Count); /* if (Count->vers[0].let == bad_char) { Count = del_cell(Count); } */ } }
void criteries() { cell *c; if (!line_number || current_fragment!=curr_frag) { dist_point_of_i_1=dist_point_of_i_2=dist_point_of_i_3= dist_point_of_i_b=0; curr_frag=current_fragment; } c=cell_f(); while ((c=c->nextl)->nextl != NULL) { criteria(c); if(language == PUMA_LANG_RUSSIAN ) { r_criteria(c,NULL); if( c->nvers>0 && memchr("’вѓЈ",c->vers[0].let,4) && !is_russian_baltic_conflict(c->vers[0].let)&& // 17.07.2001 E.P. !is_russian_turkish_conflict(c->vers[0].let) // 21.05.2002 E.P. ) stick_center_study(c,NULL,1); } } }
// finding begin of word cell * erect_next_word(cell *cs) { cell *c=cs ; if( c==cell_l() ) return NULL; if( c==cell_f() ) c=c->next; return (c->flg&c_f_fict)?NULL:c; }
/******************************************************** * * * Special programs for Й * * * ********************************************************/ #include <stdlib.h> #include <stdio.h> #include <string.h> #include "func.h" #include "struct.h" #include "linear.h" #include "linutil.h" #include "ligas.h" #include "func.h" #include "minmax.h" extern uchar p2_active; // Discrim й by base lines * static int16_t rec_ii(cell*c,cell*cap); static int16_t rec_ii_halo(cell * c); static uchar iot_pen_lc[]= { 120,60,10,0,0 }; static uchar iot_pen_uc[]= { 140,10,0 ,0,0 }; int16_t cut_by_pos_ii(s_glue * const gl,uchar let) { B_LINES bl; int16_t pen=0,upper=32000,dis,i; get_b_lines(gl->celist[0],&bl); for(i=0; i < gl->ncell; i++) upper = MIN(upper,gl->celist[i]->row); if(let==(uchar)'\xa9' /* й */ && !is_russian_turkish_conflict(let) // 21.05.2002 E.P. ) { if((dis=upper-bl.b2) <= 0) { // letter upper than bbs2 dis = abs(dis); if(dis < 5) pen = iot_pen_lc[dis]; } else pen = 160; // letter lower than bbs2 if( gl->ncell==1 && (gl->celist[0]->recsource == c_rs_ev || gl->celist[0]->recsource == (c_rs_ev|c_rs_deskr) ))// events brought vers if((Ns1+Ns2)>0 && bl.b2-bl.b1>6) { dis=upper-bl.b1; pen += dis<3 ? 60 : 0; } } if(let==(uchar)'\x89' /* Й */) { // Capital iot if((dis=upper-bl.b1) <= 0 ) { // letter upper than bbs1 dis = abs(dis); if(dis < 5) pen = iot_pen_uc[dis]; } else pen = 160; // letter lower than bbs1 } return pen; } // Go by string and recog '\xa9' /* й */ void proc_ii(void) { cell * c,*cap; uchar let; int16_t ndust; c = cell_f(); while((c=c->nextLetter()) != NULL ) { if( !c->hasCellFlag(c_f_let + c_f_bad)) continue; let = c->vers[0].let; ndust=0; //if( !memchr("иИнНпыў",let,7) || if( !memchr("\xA8\x88\xAD\x8D\xAF\xEB\xF7",let,7) || is_russian_baltic_conflict(let) // 17.07.2001 E.P. ) continue; cap = c; while((cap=cap->next)!=NULL && cap != c->nextLetter()) { if(cap->isDust()) { ndust++; switch(rec_ii(c,cap)) { case 1: goto next_let; case -1: return; case 0: break; } } } cap = c; while((cap=cap->prev)!=NULL && cap != c->previousLetter()) { if(cap->isDust()) { ndust++; //if(rec_ii(c,cap))goto next_let; switch( rec_ii(c,cap)) { case 1: goto next_let; case -1: return; case 0: break; } } } if( ndust > 1) rec_ii_halo(c); // many dusts, try all together next_let: ; } // while by letters }
void proc_Ukr( void ) { cell *c; if (db_status) snap_newpass('b'); for (c = cell_f(); c != cell_l(); c = c -> next){ proc_UKR_I(c); } }/*proc_Ukr*/
int16_t GetColumn(int16_t *VertLines,int16_t VertNum ) { int16_t start_col, end_col; int16_t col; start_col = cell_f()->next->col; end_col = cell_l()->prev->col; if( start_col<VertLines[0] || start_col>=VertLines[VertNum-1] ) return -1; col = dichotomy_array(start_col,VertLines,0,VertNum); if( end_col>VertLines[col+1] ) return -1; return col; }
// Go by string and recog 'ч' void cf::proc_shortu() { cell * c, * cap; uchar let; int ndust; c = cell_f(); while((c = c->nextLetter()) != NULL) { if(! c->hasCellFlag(c_f_let + c_f_bad)) continue; let = c->vers[0].let; ndust = 0; if(!memchr("\xE3\x93", let, 2)) continue; cap = c; while((cap = cap->next) != NULL && cap != c->nextLetter()) { if(cap->isDust()) { ndust++; switch(rec_shortu(c,cap)){ case 1: goto next_let; case -1: return; case 0: break; } } } cap = c; while((cap = cap->prev) != NULL && cap != c->previousLetter()) { if(cap->isDust()) { ndust++; //if(rec_shortu(c,cap))goto next_let; switch(rec_shortu(c,cap)) { case 1: goto next_let; case -1: return; case 0: break; } } } if(ndust > 1) rec_shortu_halo(c); // many dusts, try all together next_let: ; } // while by letters }
// Go by string and recog 'ч' void proc_shortu(void) { cell * c,*cap; uchar let; int16_t ndust; c = cell_f(); while((c=c->nextl) != NULL ){ if( !(c->flg & (c_f_let+c_f_bad)) ) continue; let = c->vers[0].let; ndust=0; if( !memchr("гУ",let,2) ) continue; cap = c; while((cap=cap->next)!=NULL && cap != c->nextl ) { if( cap->flg & c_f_dust) { ndust++; switch( rec_shortu(c,cap)) { case 1: goto next_let; case -1: return; case 0: break; } } } cap = c; while((cap=cap->prev)!=NULL && cap != c->prevl ) { if( cap->flg & c_f_dust) { ndust++; //if(rec_shortu(c,cap))goto next_let; switch( rec_shortu(c,cap)) { case 1: goto next_let; case -1: return; case 0: break; } } } if( ndust > 1) rec_shortu_halo(c); // many dusts, try all together next_let: ; } // while by letters }
void analysis_words(void) { int16_t left,right,flag=0,flag_qual; cell *WB,*WE,*cl; char snap[380],*buf=snap; left=right=0; WB=clbeg; WE=clend; //AK! add crashguard while( (WB->flg & c_f_dust) || (WB->flg & c_f_fict) || (WB->flg & c_f_punct) ) { if ((WB->next == cell_l() || WB->next == NULL) ) return; WB=WB->next; } //AK! add c/g while( (WE->flg & c_f_dust) || (WB->flg & c_f_fict) || (WB->flg & c_f_punct)) { if ((WE->prev == cell_f() || WE->prev == NULL)) return; WE=WE->prev; } if(WB==WE) return; /* ---------------- LEFT -----------------------------*/ if(WB->vers[0].let==left_quocket) left=100; else if((WB->flg & c_f_bad) || (memchr("¢э™",WB->vers[0].let,6) && !is_russian_baltic_conflict(WB->vers[0].let) && // 17.07.2001 E.P. !is_russian_turkish_conflict(WB->vers[0].let)&& // 21.05.2002 E.P. WB->vers[0].prob < MAX_PROB) || (memchr("Cc",WB->vers[0].let,2) && WB->vers[0].prob < MIN_PROB) || (memchr("u",WB->vers[0].let,1) && WB->vers[0].prob <=SMAX_PROB && WB->h <= ok_K_hgt) || (WB->vers[0].let== r_cu_u && WB->vers[0].prob < MID_PROB) || (memchr("ae",WB->vers[0].let,2) && WB->vers[0].prob < MAX_PROB) ) { if(WB->vers[0].prob<MIN_PROB && (WB->h>>1)*3 <= (WB->nextl)->h) flag_qual=1; else flag_qual=0; if(WB->vers[0].let=='1') flag_qual=0; left=check_shevron(WB,flag_qual); }
void AKTryChainRecover(cell ** c) { AKCheckChain(); // если правка цепочки не принесла результата, // проверяем, находится ли c в цепочке if ((*c)->next == NULL) { // если нет, попробуем вернуться к предыдущей, иначе к началу if (!AKCellInChain(*c)) { if (AKCellInChain((*c)->prev)) *c = (*c)->prev; else *c = cell_f()->next; } } }
//////////////////////////////////////////////////////////////////////////////////////////////////////////// // смотрим. есть ли такой cell в цепочке int16_t AKCellInChain(cell * Cell) { cell * Count; // проверяем цепочку на случай зацикливания или обрыва AKCheckChain(); for (Count = cell_f(); Count != cell_l(); Count = Count->next) { if (Count == Cell) return TRUE; } if (Cell == cell_l()) return TRUE; return FALSE; }
void insert_cell(cell *c,cell *ci) { int16_t col=c->col; if(!(ci->flg & c_f_space)) // Valdemar 02-15-96 00:17am // Paul 10-11-96 /* if (col <= ci->col) while ((ci->prev)->col > col) ci=ci->prev; else while ((ci=ci->next)->col <= col); */ //AK! Crashed line when ci->prev pointed to first_cell or ci // pointed to last_cell if (col <= ci->col) while ( (ci->prev && ci->prev != cell_f()) && ((ci->prev)->flg & c_f_space || (ci->prev)->col > col)) { if ( ci->prev ) ci=ci->prev; else AKCheckChain(); } else while ((ci != cell_l() && ci->next) && (ci->flg & c_f_space || ci->col <= col)) { if ( ci->next ) ci=ci->next; else AKCheckChain(); } // (ci->prev)->next=c; c->prev=ci->prev; c->next=ci; ci->prev=c; if (c->flg & (c_f_let+c_f_bad) && !(c->flg & c_f_dust) ) { ci=c->next; while (!(ci->flg & (c_f_fict+c_f_let+c_f_bad)) || ci->flg & c_f_dust) ci=ci->next; (ci->prevl)->nextl=c; c->prevl=ci->prevl; c->nextl=ci; ci->prevl=c; } else // AL 940318 err_pnlet (c); }
void make_simples_diff(int16_t lang) { extern char db_pass; cell *c,*e=cell_l(); int16_t dbp = db_pass; db_pass=0; for(c=cell_f()->next;c!=e;c=c->next) if( !c->env->scale ) { criteria(c); if( lang==PUMA_LANG_RUSSIAN ) r_criteria(c,NULL); } db_pass = (uchar)dbp; return; }
void test_O_and_slash() { cell *C; int16_t nv; C = cell_f(); while ((C=C->nextl)->nextl) { nv = get_nvers(C, '0'); if ( nv >= 0 ) // Minimum decrease for 'O' to prefer '0' in the cases without // obvious decision { #ifndef UFA if( language!=PUMA_LANG_RUSSIAN ) cell_bonus(C, &(C->vers[nv]), 2); #else if( language!=PUMA_LANG_RUSSIAN ) cell_bonus(C, &(C->vers[nv]), 2); else if( language==PUMA_LANG_RUSSIAN ) { if( get_nvers(C,'9')<0 && get_nvers(C,'6')<0 && get_nvers(C,'8')<0 ) cell_bonus(C, &(C->vers[nv]), 2); // Oleg : 06-09-1994 : not exist '698' - bonus 0 else { // Oleg : 06-09-1994 : exist any '698'-version nv=get_nvers(C,'Ћ'); if( nv>=0 ) cell_bonus(C, &(C->vers[nv]), -2); // monus O } } #endif sort_vers(C); } nv = get_nvers(C, '/'); if ( nv == -1 ) continue; if( language == PUMA_LANG_RUSSIAN && langSer && nv && C->nvers && C->vers[0].let==SERB_j ) continue; inc_or_dec_slash(C, nv); } }
void erection_delete(void) { cell *c=cell_f()->next, *e=cell_l(); erection_delete_seq(c,e); return; }
// // main erection function : // find and erected incline words // uchar erection_incline_words(int16_t pass) { cell *c, *e, *tmp, *sc ; uchar buf[MAX_LEN_WORD+40] ; uchar wrd[MAX_LEN_WORD+40], word_len ; Bool inc, incline, no_res ; B_LINES bl ; #ifdef STEND_INC static char oldstr[256]="c:\\"; char str[256] ; static int16_t oldline=-1 ; FILE *fp ; static int16_t first=1 ; #endif int16_t av_inc=0, n_inc=0; local_pass=pass; if( !setup_incline_word(cell_f()->nextl,cell_l(), TRUE) ) { if( db_status && snap_activity('i') ) { snap_show_text("ERECT : No erection images"); snap_monitor(); } clear_incline_word(cell_f()->nextl,cell_l()); return 0; } if( db_status && snap_activity('i') ) { snap_show_text("ERECT : Begin erection words"); snap_monitor(); } c = cell_f() ; incline = FALSE ; while(1) { if( (c = erect_next_word(c))==NULL )break; no_res=1; inc =0; strcpy(buf,"ERECT : Word : "); get_b_lines(c,&bl); e = erect_end_word(c, wrd, &word_len, (int16_t)(bl.ps/2) ); strcpy(&buf[14],wrd); erect_page_words++; sc = snap_stopcell(c,e)?snap_get_stopcell():NULL; tmp = c->prev; inc = erection_incline_word(c, e, bl.b3, 0); if( inc && !local_pass ) { incline = 1; av_inc+=inc; n_inc++; erect_page_incline[inc/16]++; } #ifdef STEND_INC if( inc && !(db_status && snap_activity('i')) ) { set_page_name(str); if( strcmp(str, oldstr) ) { strcpy(oldstr, str); fp = fopen("erection.txt",first?"wt":"at"); if( fp==NULL ) fp = fopen("erection.txt","wt"); if( first) first=0; fprintf(fp,"\nfile : %s", str); fclose(fp); } if( oldline!=line_number) { oldline=line_number; fp = fopen("erection.txt","at"); fprintf(fp,"\nline %d : ", line_number+1); fclose(fp); } sprintf(wrd,"(%d)",inc); no_res=0; strcat(buf,wrd) ; fp = fopen("erection.txt","at"); fprintf(fp," %s ", &buf[14]); fclose(fp); } #endif if( db_status && snap_activity('i') ) { if( inc || sc ) { if( inc ) sprintf(wrd,"(%d)",inc); else sprintf(wrd,"(no erection)"); if( no_res ) strcat(buf,wrd) ; snap_newcell(sc?sc:tmp->next) ; snap_show_text(buf) ; snap_monitor() ; } } if( e==NULL ) break; c = e; } if( n_inc ) erection_inc = av_inc / n_inc; clear_incline_word(cell_f()->nextl,cell_l()); if( db_status && snap_activity('i') ) { snap_show_text("ERECT : End erection words "); snap_monitor(); } return(uchar) incline ; }
////////////////////////////////////////////////////////////////////////////////////////////////////////////// //AK debug // Проверяем цепочку и пытаемся ее поправить void AKCheckChain() { cell * ForwardChain; cell * BackwardChain; cell * Back; cell * BackTwo; cell * Appendix = NULL; cell * LossCell; cell * Cicle; // проверяем цепочку вперед for ( Back = cell_f(), ForwardChain = cell_f()->next; ForwardChain != cell_l() && ForwardChain->next != NULL; ForwardChain = ForwardChain->next ) { // проверяем на зацикленность for ( Cicle = cell_f(); Cicle != ForwardChain; Cicle = Cicle->next ) if (ForwardChain->next == Cicle) { //если зацикливается... ForwardChain = NULL; break; } // проверка обратной связи if ( ForwardChain->prev != Back ) { LossCell = ForwardChain->prev; ForwardChain->prev = Back; } Back = ForwardChain; } // если прошли до конца... if ( ForwardChain == cell_l() ) return; // или не прошли... if ( ForwardChain->prev != Back ) ForwardChain->prev = Back; // проверяем цепочку надзад for ( Back = cell_l(), BackwardChain = cell_l()->prev; BackwardChain != cell_f() && BackwardChain->prev != NULL; BackwardChain = BackwardChain->prev ) { // проверка обратной связи if ( BackwardChain->next != Back ) { // смотрим, куда идет апендикс for ( BackTwo = BackwardChain, Appendix = BackwardChain->next; Appendix != cell_l() && Appendix->next != NULL; Appendix = Appendix->next ) { if ( Appendix->prev != BackTwo ) { LossCell = Appendix->prev; Appendix->prev = BackTwo; } } // если по апендиксу дошли до обрыва от начала, то соединяем if ( Appendix == ForwardChain ) { ForwardChain->next = Back; Back->prev = ForwardChain; return; } // если конец апендикса не обрыв от начала... if ( Appendix != cell_l() ) { Appendix->next = Back; Back->prev = Appendix; BackwardChain = Back; } } // проверяем на потерянный next if ( BackwardChain->prev == ForwardChain ) { ForwardChain->next = BackwardChain; return; } Back = BackwardChain; } // Если не прошли до начала... if ( BackwardChain != cell_f() ) { ForwardChain->next = BackwardChain; BackwardChain->prev = ForwardChain; return; } // Если назад прошли до начала, а вперед только до обрыва, // то есть аппендикс от начала, пристраеваем его в начало // (на всякий случай проверим, остался ли апендикс целым if ( BackwardChain == cell_f() && ForwardChain != cell_l() ) { ForwardChain->next = Back; Back->prev = ForwardChain; } }
void final_crit() { cell *c; uchar let,let1; int16_t cnt; for (cnt=0,c=cell_f()->next; c->next!=NULL; c=c->next) { if (c->flg&c_f_space) continue; let=c->vers[0].let; if(final_crit_russian(c)) cnt++; if (c->font&c_fp_undrln) { if (let=='v') final_vy(c); if (let=='e' || let=='u') final_eg(c); if (let=='o' || let=='a' && c->nvers>=2 && c->vers[1].let=='o') final_ao_gpq(c); if (let=='O') final_OQ(c); if (let=='.') final_dotcom(c); } if (let=='/') cnt+=final_slash_l(c); else { if ((c->font|c->font_new)&c_fp_it && memchr("Il1",let,3)) cnt+=final_no_slash(c); if (c->flg&c_f_bad || c->vers[0].prob<PROBL) cnt+=final_back_slash(c); } if ((let=='c' || let=='C') && c->vers[1].let=='<') { c->nvers=1; c->vers[1].let=0; } if ((let=='b' || let=='h') && (c->font|c->font_new)&c_fp_it) final_bh(c); if ( (language==LANG_GERMAN || language==LANG_SPANISH || language==LANG_FRENCH || language==LANG_ESTONIAN || // 19.07.2001 E.P. language==LANG_TURKISH // 30.05.2002 E.P. ) && ((c->font|c->font_new)&c_fp_it || let=='i' || let==liga_i || language == LANG_TURKISH && // 30.05.2002 E.P. (let==i_sans_accent||let==II_dot_accent) ) && (let=='i' || let==liga_i || language == LANG_TURKISH && // 30.05.2002 E.P. (let==i_sans_accent||let==II_dot_accent) || let==liga_fi || let==liga_ffi ) && ( (let1=c->nextl->vers[0].let)=='i' || let1==liga_i || language == LANG_TURKISH && // 30.05.2002 E.P. (let1==i_sans_accent||let1==II_dot_accent) ) && (c->cg_flag&c_cg_cutr && c->nextl->cg_flag&c_cg_cutl || ((c->font|c->font_new)&c_fp_str) && c->nextl->col-(c->col+c->w)<=1 ) ) final_ii_u(c); if (language==LANG_GERMAN && memchr("0AOU",let,4) || language==LANG_SWEDISH && memchr("0AO",let,3) || language==LANG_SPANISH && let=='U' || language==LANG_ESTONIAN && memchr("0AOU",let,4) // 19.07.2001 E.P. ) { get_b_lines(c,&bl); if (bl.b1>c->row) final_AOU_2dot(c); } if (language==LANG_SWEDISH && c->vers[0].let=='A') { get_b_lines(c,&bl); if (bl.b1>c->row+MAX(4,bl.ps/4)) final_A_circle(c); } if (let=='-' && 3*c->h<c->w && (c->prev->flg&(c_f_fict|c_f_space) && c->prev->vers[0].let!=0x1e || memchr(".,:;",c->prev->vers[0].let,4)) && (c->next->flg&(c_f_fict|c_f_space) && c->next->vers[0].let!=0x1e || memchr(".,:;",c->next->vers[0].let,4))) c->vers[0].let='_'; if (let=='F' && c->cg_flag&c_cg_cutr && memchr(".,",c->next->vers[0].let,2) && c->next->cg_flag&c_cg_cutl && c->col+c->w>c->next->col) { clist[0]=c; clist[1]=c->next; compose_cell(2,clist,c); c->vers[0].let='E'; c->left=c->col; c->right=c->col+c->w; } if (memchr("Vv",let,2) && c->cg_flag&c_cg_cutr && c->next->vers[0].let==let && c->next->cg_flag&c_cg_cutl) final_vv_w(c); if (let=='d' && c->vers[0].prob>=PROBD && memchr("cistx",c->next->vers[0].let,5) && c->next->vers[0].prob<=PROBB && c->next->col-(c->col+c->w)<=1) final_d_chck(c); if (let==0x27 && c->cg_flag&c_cg_cutr && memchr("Il",c->next->vers[0].let,2) && c->next->cg_flag&c_cg_cutl) { c=c->next; // AL 940321 final_Il_1(c); cnt++; } if (let=='7' && c->cg_flag&c_cg_cutr && c->next->vers[0].let==0x27 && c->next->cg_flag&c_cg_cutl) {final_7_T(c); cnt++;} } if (cnt) { delspace(); space_size((int16_t)get_size()); space_cell(); context_proc_line(); cont_space(); } }
Bool digital_last_context(void) { cell *curr,*tmp; char punct_list[]="'\"=:"; char sign_list[]="+-_"; int16_t l = strlen(punct_list),num,num_dig,num_bad,num_broken; uchar c,p; if( db_status && snap_activity('c') ) { snap_show_text("DIG : Digital last context"); snap_monitor(); } tmp = cell_f(); while( (tmp=tmp->next)->next ) { if( tmp->nvers && tmp->vers[0].prob==2 ) set_bad_cell(tmp); } num=num_dig=num_bad=num_broken=0; tmp = cell_f(); while( (tmp=tmp->next)->next && !(tmp->flg&(c_f_let|c_f_bad)) ); /* find let */ curr = cell_f(); if( tmp->next ) { while( (curr=curr->next)!=tmp ) if( (curr->flg&c_f_punct) && !(curr->nvers>0 && memchr(sign_list,curr->vers[0].let,sizeof(sign_list))) ) { curr = del_cell(curr); /* kill first punctuation-cell */ if( curr->next==NULL )break; } } curr = cell_l(); while( (curr=curr->prev)->prev && !(curr->flg&(c_f_let|c_f_bad)) ); /* find let */ if( curr->next ) { while( (curr=curr->next)->next ) if( (curr->flg&c_f_punct) ) { curr = del_cell(curr); /* kill last punctuation-cell */ if( curr->next==NULL )break; } } curr = cell_f(); while( (curr=curr->next)->next ) { if( curr->flg&c_f_bad ) num_bad++; if( !(curr->flg&(c_f_let|c_f_punct))) continue; c = curr->vers[0].let; if( memchr(punct_list,c,l) || c==left_quocket || c==right_quocket || c==low_quotes // macros E.P. ) { curr = del_cell(curr); if( curr->next==NULL )break; continue; } /* if( !enable_setup_alphabet && !test_alphabet_elem(c) ) { curr = del_cell(curr); if( curr->next==NULL )break; continue; } */ if( (curr->flg&c_f_let) ) { //if( memchr("0123456789ОЃ°ЗІТВшl",c,19) && if( isLikeDigit(c) && // 21.05.2002 E.P. curr->vers[0].prob > 150 // Nick 26.02.2001 ) num_dig++; if( memchr("С",c,1) )num_broken++; if( curr->flg&c_f_let )num++; } } if( digital_mode==2 || num_dig*4>=num*3 || num_dig==2 && num==3 || num_dig==1 && num==1 && num_bad==1 ) { /* digital string */ curr = cell_f(); if( digital_mode==2 ) while( (curr=curr->next)->next ) { if( !(curr->flg&(c_f_let|c_f_punct))) continue; c = curr->vers[0].let; if ( c==liga_exm || c=='!' ) curr=del_cell(curr); } curr = cell_f(); while( (curr=curr->next)->next ) { if( curr->nvers<1 ) continue; c = curr->vers[0].let; p = curr->vers[0].prob; if( c==(uchar)'_' ) curr->vers[0].let='-' ; if( c==(uchar)'ш' ) curr->vers[0].let='2' ; if( c==(uchar)'О' || // 21.05.2002 E.P. ( c==(uchar)'Ѓ')&& !is_russian_turkish_conflict(c) ) curr->vers[0].let='0' ; if( c==(uchar)'°' ) curr->vers[0].let='6' ; if( c==(uchar)'З' || c==(uchar)'І') curr->vers[0].let='3' ; if( c==(uchar)'В' ) curr->vers[0].let='8' ; if( c==(uchar)'С' && (p<190 || num_broken+num_dig==num ) ) curr->vers[0].let='0' ; if( c==(uchar)'Т' || c==liga_exm || c=='!' || c=='|' ) curr->vers[0].let='1' ; if( c==(uchar)'l' ) curr->vers[0].let='1' ; if( c=='$' && curr->nvers==1 ) { curr->vers[0].let='5'; curr->vers[1].let='$'; curr->vers[1].prob=curr->vers[0].prob-10; curr->nvers=2; } if( c=='5' && curr->nvers>2 && curr->vers[1].let==(uchar)'°' && p==curr->vers[1].prob && curr->vers[2].let=='6' && curr->vers[2].prob>150 ) { /* alternates : °56 -->>CONTEXT-->> 5°6 */ curr->vers[0].let='6'; curr->vers[2].let='5'; } } } digital_mode=0; set_all_alphabet(); for(curr=cell_f()->nextl;curr!=cell_l();curr=curr->nextl) { if( curr->flg&c_f_bad ) { s_glue GL={0}; short_recog_cell(curr); if( curr->nvers<1 ) { GL.celist[0]=curr; GL.maxlist[0]=curr; GL.complist[0]=curr->env; GL.maxlist[1]=GL.celist[1]=NULL; GL.complist[1]=NULL; GL.ncell=1; GL.row = curr->row; GL.col = curr->col; GL.width = curr->w; GL.height = curr->h; if( crecell(curr,&GL,3)>=0) dmBOX(curr,&GL); } criteria(curr); levcut(curr,1); if( curr->nvers<1 || curr->vers[0].prob<220 ) { // restore bad sybmol set_bad_cell(curr); } if( curr->nvers && curr->vers[0].let=='|' ) { curr->vers[0].let='1'; // Nick add 26.02.2001 - test artificial '1' levcut(curr,1); if( curr->nvers<1 || curr->nvers==1 && curr->vers[0].let!='1' || curr->vers[0].prob<220 ) { // restore bad sybmol set_bad_cell(curr); } } } } for(curr=cell_f()->nextl;curr!=cell_l();curr=curr->nextl) { if( curr->nvers && curr->vers[0].let=='(' && curr->next && curr->next->nvers && curr->next->vers[0].let==')' ) { clist[0]=curr; clist[1]=curr->next; compose_cell(2,clist,curr); curr->vers[0].let='0'; curr->vers[1].prob=220; curr->nvers=1; } } if( db_status && snap_activity('c') ) { snap_show_text("DIG : Digital last context end"); snap_monitor(); } num_dig = 0; // Nick 26.02.2001 num = 0; for(curr=cell_f()->nextl;curr!=cell_l();curr=curr->nextl,num++) { // 21.05.2002 E.P. Bool likeDigit = isLikeDigit(curr->vers[0].let); if( curr->nvers<1 || curr->vers[0].prob<150 && likeDigit) return 0; // bad symbols or small propability if( likeDigit ) num_dig++; } if( num_dig*2 <= num ) // Nick 26.02.2001 return 0; return 1; }
void setup_digital_mode_pass2(str_info *str, int16_t first_pass) { cell *curr; int16_t num_dig,num_let,num_bad,num_all, num_dig_let; ///char dig_list[]="|1234567890°ОЃЗІВш¢", 21.05.2002 E.P. char dig_let_list[]="036",let_dig_list[]="В¢ш"; // letters_ini пунктуацию переводит в letter !!! char punct_list[]="'\"=:!Љ?\\.,-"; // remove (){}[]/ uchar c,p; digital_mode=0; set_all_alphabet(); curr = cell_f(); num_all=num_dig=num_let=num_bad=num_dig_let=0; while( (curr=curr->next)->next ) { if( !(curr->flg&c_f_let) ) //!(curr->flg&(c_f_let|c_f_bad)) ) continue; num_all++; if( (curr->flg&c_f_bad) ) num_bad++; else { c = curr->vers[0].let; p = curr->vers[0].prob; if( memchr(punct_list,c,sizeof(punct_list)) ) { num_all--; continue; } if( !isLikeDigit(c) ) { if( p>220 ) num_let++; if( p > 240 ) // Nick 11.05.2001 { //num_let++; if( curr->nvers < 2 || !isLikeDigit(curr->vers[1].let) || curr->vers[1].prob < 220 ) num_dig--; // штраф на цифровой } } if( isLikeDigit(c) ) { num_dig++; if( memchr(let_dig_list,c,sizeof(let_dig_list)) && p>220 ) num_let++; } if( memchr(dig_let_list,c,sizeof(dig_let_list)) ) num_dig_let++; } } str->all += num_all; //if( num_dig < 0 ) // num_dig = 0; str->dig += num_dig; str->fuzzy_dig += num_dig_let; return; }
void setup_digital_mode(str_info *str, int16_t first_pass) { cell *curr; int16_t num_dig,num_let,num_bad,num_all, num_dig_let; int16_t num_asOne; //char dig_list[]="|1234567890°ОЃЗІВш¢", char dig_let_list[]="036",let_dig_list[]="В¢ш"; //char punct_list[]="'\"=:!(){}[]!Љ?\\/"; char punct_list[]="'\"=:;"; char asOne_list[]="1[]/!|"; uchar c,p; digital_mode=0; set_all_alphabet(); curr = cell_f(); num_all=num_dig=num_let=num_bad=num_dig_let=0; num_asOne = 0; while( (curr=curr->next)->next ) { if( !(curr->flg&(c_f_let|c_f_bad)) ) continue; num_all++; if( (curr->flg&c_f_bad) ) num_bad++; else { c = curr->vers[0].let; p = curr->vers[0].prob; if( memchr(punct_list,c,sizeof(punct_list)) ) { num_all--; continue; } if( memchr(asOne_list,c,sizeof(asOne_list)) ) { num_asOne++; } if( !isLikeDigit(c) && p>220 ) // 21.05.2002 E.P. num_let++; if( isLikeDigit(c) ) { num_dig++; if( memchr(let_dig_list,c,sizeof(let_dig_list)) && p>220 ) num_let++; } if( memchr(dig_let_list,c,sizeof(dig_let_list)) ) num_dig_let++; } } if( str->alphabet==ALPHA_ALL ) { int16_t dig=0; switch( num_all ) { case 0 : break; case 1 : if( num_dig>0 && num_dig > num_asOne ) dig = 2; break; case 2 : if( num_dig>0 && num_dig > num_asOne && num_let==0 ) dig = 2; break; case 3 : if( num_dig>1 && num_dig > num_asOne && num_let==0 ) dig = 2; break; case 4 : if( num_dig>2 && num_let==0 && num_dig > num_asOne ) dig = 2; break; default : if( num_dig>MAX(2,(3*num_all)/4) && num_let==0 || num_dig>0 && num_dig > num_asOne && num_bad==0 && num_let==0 ) dig = 2; else if( num_dig*10>num_all*6 && num_all>5 /*&& num_bad*/) dig = 1; break; } if( dig==1 && num_dig*10>num_all*6 && num_all>5 /*&& num_bad*/) { digital_mode=1; } if( dig==2 && (num_dig>num_all/2&&num_all>5 || str->dig*4>str->all || first_pass) ) { digital_mode=2; set_digital_alphabet(); if( str->dollars ) add_alphabet_elem('$'); } } else if (str->alphabet==ALPHA_DIGITAL || str->alphabet==ALPHA_DIGITAL_TRUE) { if( num_dig>=MAX(1,num_all/2) && num_let<MAX(1,num_all/4) || num_dig>0 && num_bad==0 && num_let==0) { digital_mode=2; set_digital_alphabet(); if( str->dollars ) add_alphabet_elem('$'); } } if( digital_mode==2 ) { digital_string_penalty = (num_dig_let!=num_dig)?254:20; for(curr = cell_f();curr!=cell_l();curr=curr->nextl) { if( !(curr->flg&c_f_let) ) continue; if( curr->nvers>0 && ( memchr(punct_list,curr->vers[0].let,sizeof(punct_list)) || curr->vers[0].let != '1' && memchr(asOne_list,curr->vers[0].let,sizeof(asOne_list)) ) ) { if( curr->vers[0].prob>220 && !memchr("()",curr->vers[0].let,2) ) add_alphabet_elem(curr->vers[0].let); continue; } if( !is_digit(curr->vers[0].let) ) down_all_versions(curr,100); } } return; }
void save_alphabet_information(str_info *str,int16_t column,CSTR_line ln) { uchar buf[256]={0}; cell * c; RecObject ro={0}; CSTR_rast rst; RecRaster rrst; RecVersions ver; LeoFieldSetup fs={0}; CSTR_attr lattr; CSTR_rast_attr attr; UniVersions uvs; if( db_status && snap_activity('n') ) { snap_show_text("DIG : start selection"); snap_monitor(); } str[column].all++; fs.nStyle = LS_PRINT; buf['0']=buf['1']=buf['2']=buf['3']=buf['4']= buf['5']=buf['6']=buf['7']=buf['8']=buf['9']=1; memcpy(fs.AlphaTable,buf,256); #ifdef _USE_LEO_ LEOSetupField(&fs); for(c=cell_f()->nextl;c!=cell_l();c=c->nextl) { if( c->nvers==0 && c->w*2>c->h && c->h*2>c->w ) { puchar wr; // to work_rst wr=save_raster_align8(c); // rst -->> rrst rrst.lnPixHeight = c->h; rrst.lnPixWidth = c->w; rrst.lnRasterBufSize = REC_MAX_RASTER_SIZE; memcpy(rrst.Raster,wr,((c->w+63)/64)*8*c->h); memcpy(&ro.recData.recRaster,&rrst,REC_MAX_RASTER_SIZE); LEORecogPrnMethod( &ro , REC_METHOD_3X5, 1); memcpy(&ver,&ro.recResults,sizeof(ver)); if( ver.lnAltCnt && ver.Alt[0].Prob>220 ) { c->nvers=1; c->vers[0].let=ver.Alt[0].Code; c->vers[0].prob=ver.Alt[0].Prob; c->vers[1].let=0; c->flg=c_f_let; } } } #endif setup_digital_mode(&str[column], 1); for(c=cell_f()->next;c!=cell_l();c=c->next) if( c->nvers>0 && c->vers[0].let=='$' && c->vers[0].prob>220 ) { str[column].dollars++; break; } if( digital_mode==2 ) { str[column].dig++; if( db_status && snap_activity('n') ) { sprintf(buf,"DIG : digital str, col %d, all %d, dig %d, plus_minus %d" , column, str[column].all,str[column].dig,str[column].pm); snap_show_text(buf); snap_monitor(); } } else if( digital_mode==1 ) { str[column].fuzzy_dig++; if( db_status && snap_activity('n') ) { sprintf(buf,"DIG : fuzzy digital str, col %d, all %d, dig %d, plus_minus %d" , column, str[column].all,str[column].dig,str[column].pm); snap_show_text(buf); snap_monitor(); } } else { plusminus_mode=0; if( cell_f()->next && cell_f()->next==cell_l()->prev ) { c=cell_f()->next; rst=CSTR_GetNextRaster(CSTR_GetFirstRaster(ln),CSTR_f_bad|CSTR_f_let); if( rst && CSTR_GetImage(rst,(uchar *)&rrst,CSTR_TYPE_IMAGE_RS)) { if( c->w>c->h*5 || c->w>c->h*3 && c->env && c->env->nl == 1 // Nick 18.01.2002 ) { c->nvers=1; c->vers[0].let='-'; c->vers[0].prob=254; ver.Alt[0].Code='-'; ver.Alt[0].Prob=255; ver.lnAltCnt =1; CSTR_StoreCollection(rst,&ver); CSTR_GetAttr(rst,&attr); //attr.keg=Ps<<line_scale; CSTR_SetAttr(rst,&attr); CSTR_GetLineAttr(ln,&lattr); lattr.Flags |= CSTR_STR_ReadyResults ; CSTR_SetLineAttr(ln,&lattr); plusminus_mode=1; } else if( abs(c->w-c->h)<5 ) { #ifdef _USE_LEO_ fs.nStyle = LS_PRINT; buf['+']=1; memcpy(fs.AlphaTable,buf,256); LEOSetupField(&fs); memcpy(&ro.recData.recRaster,&rrst,REC_MAX_RASTER_SIZE); LEORecogPrnMethod( &ro , REC_METHOD_3X5, 1); memcpy(&ver,&ro.recResults,sizeof(ver)); if( ver.lnAltCnt && ver.Alt[0].Code=='+' && ver.Alt[0].Prob>220 ) { c->nvers=1; c->vers[0].let='+'; c->vers[0].prob=254; ver.lnAltCnt =1; CSTR_StoreCollection(rst,&ver); CSTR_GetAttr(rst,&attr); //attr.keg=Ps<<line_scale; CSTR_SetAttr(rst,&attr); CSTR_GetLineAttr(ln,&lattr); lattr.Flags |= CSTR_STR_ReadyResults ; CSTR_SetLineAttr(ln,&lattr); plusminus_mode=1; } #endif } } } if( plusminus_mode ) { str[column].pm++; rst=CSTR_GetNextRaster(CSTR_GetFirstRaster(ln),CSTR_f_bad|CSTR_f_let); CSTR_GetCollectionUni(rst,&uvs); if( uvs.lnAltCnt ) { uvs.Alt[0].Charset=CodePages[language]; CSTR_StoreCollectionUni(rst,&uvs); } if( db_status && snap_activity('n') ) { sprintf(buf,"DIG : plus_minus str , col %d, all %d, dig %d, plus_min %d" , column, str[column].all,str[column].dig,str[column].pm); snap_show_text(buf); snap_monitor(); } } else { if( db_status && snap_activity('n') ) { sprintf(buf,"DIG : unknown alphabet , col %d, all %d, dig %d, plus_min %d" , column, str[column].all,str[column].dig,str[column].pm); snap_show_text(buf); snap_monitor(); } } } return; }