gboolean is_pinyin_kbm() { char kbm_str[32]; get_hime_conf_fstr(PHONETIC_KEYBOARD, kbm_str, "zo-asdf"); #if 1 b_pinyin = strstr(kbm_str, "pinyin") != NULL; #else b_pinyin = 1; #endif if (b_pinyin) load_pin_juyin(); return b_pinyin; }
int main(int argc, char **argv) { FILE *fp,*fw; char s[1024]; u_char chbuf[MAX_PHRASE_LEN * CH_SZ]; u_short phbuf[80]; u_int phbuf32[80]; u_int64_t phbuf64[80]; int i,j,idx,len, ofs; u_short kk; u_int64_t kk64; int hashidx[TSIN_HASH_N]; u_char clen; int lineCnt=0; gboolean reload = getenv("HIME_NO_RELOAD")==NULL; if (reload) { dbg("need reload\n"); } else { dbg("NO_GTK_INIT\n"); } if (getenv("NO_GTK_INIT")==NULL) gtk_init(&argc, &argv); dbg("enter %s\n", argv[0]); if (argc < 2) p_err("must specify input file"); init_TableDir(); if ((fp=fopen(argv[1], "rb"))==NULL) { printf("Cannot open %s\n", argv[1]); exit(-1); } skip_utf8_sigature(fp); char *outfile; int fofs = ftell(fp); myfgets(s, sizeof(s), fp); if (strstr(s, "!!pinyin")) { b_pinyin = TRUE; printf("is pinyin\n"); load_pin_juyin(); } else fseek(fp, fofs, SEEK_SET); fofs = ftell(fp); int keybits=0, maxkey=0; char keymap[128]; char kno[128]; bzero(kno, sizeof(kno)); myfgets(s, sizeof(s), fp); puts(s); if (strstr(s, TSIN_GTAB_KEY)) { is_gtab = TRUE; lineCnt++; if (argc < 3) p_err("useage %s input_file output_file", argv[0]); outfile = argv[2]; len=strlen((char *)s); if (s[len-1]=='\n') s[--len]=0; char aa[128]; keymap[0]=' '; sscanf(s, "%s %d %d %s", aa, &keybits, &maxkey, keymap+1); for(i=0; keymap[i]; i++) kno[keymap[i]]=i; if (maxkey * keybits > 32) gtabkey64 = TRUE; } else { if (argc==3) outfile = argv[2]; else outfile = "tsin32"; fseek(fp, fofs, SEEK_SET); } INMD inmd, *cur_inmd = &inmd; char *cphbuf; if (is_gtab) { cur_inmd->keybits = keybits; if (gtabkey64) { cphbuf = (char *)phbuf64; phsz = 8; key_cmp = key_cmp64; hash_shift = TSIN_HASH_SHIFT_64; cur_inmd->key64 = TRUE; } else { cphbuf = (char *)phbuf32; phsz = 4; hash_shift = TSIN_HASH_SHIFT_32; key_cmp = key_cmp32; cur_inmd->key64 = FALSE; } cur_inmd->last_k_bitn = (((cur_inmd->key64 ? 64:32) / cur_inmd->keybits) - 1) * cur_inmd->keybits; dbg("cur_inmd->last_k_bitn %d\n", cur_inmd->last_k_bitn); } else { cphbuf = (char *)phbuf; phsz = 2; key_cmp = key_cmp16; hash_shift = TSIN_HASH_SHIFT; } dbg("phsz: %d\n", phsz); phcount=ofs=0; while (!feof(fp)) { usecount_t usecount=0; lineCnt++; myfgets((char *)s,sizeof(s),fp); len=strlen((char *)s); if (s[0]=='#') continue; if (strstr(s, TSIN_GTAB_KEY)) continue; if (s[len-1]=='\n') s[--len]=0; if (len==0) continue; i=0; int chbufN=0; int charN = 0; while (s[i]!=' ' && i<len) { int len = utf8_sz((char *)&s[i]); memcpy(&chbuf[chbufN], &s[i], len); i+=len; chbufN+=len; charN++; } while ((i < len && s[i]==' ') || s[i]=='\t') i++; int phbufN=0; while (i<len && phbufN < charN && s[i]!=' ') { if (is_gtab) { kk64=0; int idx=0; while (s[i]!=' ' && i<len) { int k = kno[s[i]]; kk64|=(u_int64_t)k << ( LAST_K_bitN - idx*keybits); i++; idx++; } if (phsz==8) phbuf64[phbufN++]=kk64; else phbuf32[phbufN++]=(u_int)kk64; } else { kk=0; if (b_pinyin) { kk = pinyin2phokey(s+i); while (s[i]!=' ' && i<len) i++; } else { while (s[i]!=' ' && i<len) { if (kk==(BACK_QUOTE_NO << 9)) kk|=s[i]; else kk |= lookup((u_char *)&s[i]); i+=utf8_sz((char *)&s[i]); } } phbuf[phbufN++]=kk; } i++; } if (phbufN!=charN) { p_err("%s Line %d problem in phbufN!=chbufN %d != %d\n", s, lineCnt, phbufN, chbufN); } clen=phbufN; while (i<len && s[i]==' ') i++; if (i==len) usecount = 0; else usecount = atoi((char *)&s[i]); /* printf("len:%d\n", clen); */ if (phcount >= phidxsize) { phidxsize+=1024; if (!(phidx=(int *)realloc(phidx,phidxsize*4))) { puts("realloc err"); exit(1); } } phidx[phcount++]=ofs; int new_bfN = ofs + 1 + sizeof(usecount_t)+ phsz * clen + chbufN; if (bfsize < new_bfN) { bfsize = new_bfN + 1024*1024; bf = (char *)realloc(bf, bfsize); } memcpy(&bf[ofs++],&clen,1); memcpy(&bf[ofs],&usecount, sizeof(usecount_t)); ofs+=sizeof(usecount_t); memcpy(&bf[ofs], cphbuf, clen * phsz); ofs+=clen * phsz; memcpy(&bf[ofs], chbuf, chbufN); ofs+=chbufN; } fclose(fp); /* dumpbf(bf,phidx); */ puts("Sorting ...."); qsort(phidx,phcount, sizeof(phidx[0]),qcmp); if (!(sf=(u_char *)malloc(bfsize))) { puts("malloc err"); exit(1); } if (!(sidx=(int *)malloc(phidxsize*sizeof(int)))) { puts("malloc err"); exit(1); } // delete duplicate ofs=0; j=0; for(i=0;i<phcount;i++) { idx = phidx[i]; sidx[j]=ofs; len=bf[idx]; int tlen = utf8_tlen(&bf[idx + 1 + sizeof(usecount_t) + phsz*len], len); clen= phsz*len + tlen + 1 + sizeof(usecount_t); if (i && !qcmp_eq(&phidx[i-1], &phidx[i])) continue; memcpy(&sf[ofs], &bf[idx], clen); j++; ofs+=clen; } phcount=j; #if 1 puts("Sorting by usecount ...."); qsort(sidx, phcount, 4, qcmp_usecount); #endif for(i=0;i<256;i++) hashidx[i]=-1; for(i=0;i<phcount;i++) { idx=sidx[i]; idx+= 1 + sizeof(usecount_t); int v; if (phsz==2) { phokey_t kk; memcpy(&kk, &sf[idx], phsz); v = kk >> TSIN_HASH_SHIFT; } else if (phsz==4) {