static void cb_button_add(GtkButton *button, gpointer user_data) { bigphoN = 0; char *p = current_str; while (*p) { char_pho *pbigpho = &bigpho[bigphoN++]; if (ph_key_sz==2) { pbigpho->phokeysN = utf8_pho_keys(p, (phokey_t*)pbigpho->phokeys); } else { pbigpho->phokeysN = lookup_gtab_key(p, pbigpho->phokeys); } p+=utf8_sz(p); if (!pbigpho->phokeysN) { dbg(" no mapping to pho\n"); return; } } GtkWidget *sel = create_pho_sel_area(); gtk_box_pack_start (GTK_BOX (hbox_buttons), sel, FALSE, FALSE, 20); gtk_widget_show_all(hbox_buttons); }
int pho_lookup(char *s, char *num, char *typ) { int i; char tt[CH_SZ+1], *pp; int len = utf8_sz(s); if (utf8_eq(s, "1")) { *num = 0; *typ = 3; return TRUE; } if (!(*s&0x80)) return *s-'0'; bchcpy(tt, s); tt[len]=0; for(i=0;i<4;i++) { if ((pp=strstr(pho_chars[i], tt))) break; } if (!pp) return FALSE; *typ=i; *num=(pp - pho_chars[i])/3; return TRUE; }
void str_to_all_phokey_chars(char *u8_str, char *out) { out[0]=0; while (*u8_str) { phokey_t phos[32]; int n=utf8_pho_keys(u8_str, phos); #if 0 utf8_putchar(u8_str); dbg("n %d\n", n); #endif int i; for(i=0; i < n; i++) { char *pstr = phokey_to_str(phos[i]); strcat(out, pstr); if (i < n -1) strcat(out, " "); } u8_str+=utf8_sz(u8_str); if (*u8_str) strcat(out, " | "); } }
int gtab_key2name(INMD *tinmd, u_int64_t key, char *t, int *rtlen) { int tlen=0, klen=0; int j; for(j=Max_tab_key_num1(tinmd) - 1; j>=0; j--) { int sh = j * KeyBits1(tinmd); int k = (key >> sh) & tinmd->kmask; if (!k) break; int len; char *keyname; if (tinmd->keyname_lookup) { len = 1; keyname = (char *)&tinmd->keyname_lookup[k]; } else { keyname = (char *)&tinmd->keyname[k * CH_SZ]; len = (*keyname & 0x80) ? utf8_sz(keyname) : strlen(keyname); } // dbg("uuuuuuuuuuuu %d %x len:%d\n", k, tinmd->keyname[k], len); memcpy(&t[tlen], keyname, len); tlen+=len; klen++; } t[tlen]=0; *rtlen = tlen; return klen; }
void load_ts_phrase() { FILE *fp = tsin_hand.fph; int i; for(i=0; i < phraseN; i++) free(phrase[i]); free(phrase); phrase = NULL; phraseN = 0; dbg("fname %s\n", current_tsin_fname); int ofs = is_gtab ? sizeof(TSIN_GTAB_HEAD):0; fseek(fp, ofs, SEEK_SET); while (!feof(fp)) { u_int64_t phbuf[MAX_PHRASE_LEN]; char chbuf[MAX_PHRASE_LEN * CH_SZ + 1]; u_char clen; usecount_t usecount; clen = 0; fread(&clen,1,1,fp); if (clen > MAX_PHRASE_LEN) p_err("bad tsin db clen %d > MAX_PHRASE_LEN %d\n", clen, MAX_PHRASE_LEN); fread(&usecount,sizeof(usecount_t), 1, fp); fread(phbuf, ph_key_sz, clen, fp); int tlen = 0; for(i=0; i < clen; i++) { int n = fread(&chbuf[tlen], 1, 1, fp); if (n<=0) goto stop; int len=utf8_sz(&chbuf[tlen]); fread(&chbuf[tlen+1], 1, len-1, fp); tlen+=len; } if (clen < 2) continue; chbuf[tlen]=0; phrase = trealloc(phrase, char *, phraseN+1); phrase[phraseN++] = strdup(chbuf); } stop: // fclose(fp); qsort(phrase, phraseN, sizeof(char *), qcmp_str); dbg("phraseN: %d\n", phraseN); }
void load_ts_phrase() { FILE *fp = tsin_hand.fph; int i; dbg("fname %s\n", current_tsin_fname); int ofs = is_gtab ? sizeof(TSIN_GTAB_HEAD):0; fseek(fp, ofs, SEEK_SET); tsN=0; free(ts_idx); ts_idx=NULL; while (!feof(fp)) { ts_idx = trealloc(ts_idx, int, tsN); ts_idx[tsN] = ftell(fp); u_int64_t phbuf[MAX_PHRASE_LEN]; char chbuf[MAX_PHRASE_LEN * CH_SZ + 1]; u_char clen; usecount_t usecount; clen = 0; fread(&clen,1,1,fp); if (clen > MAX_PHRASE_LEN) { box_warn("bad tsin db clen %d > MAX_PHRASE_LEN %d\n", clen, MAX_PHRASE_LEN); break; } fread(&usecount,sizeof(usecount_t), 1, fp); fread(phbuf, ph_key_sz, clen, fp); int tlen = 0; for(i=0; i < clen; i++) { int n = fread(&chbuf[tlen], 1, 1, fp); if (n<=0) goto stop; int len=utf8_sz(&chbuf[tlen]); fread(&chbuf[tlen+1], 1, len-1, fp); tlen+=len; } if (clen < 2) continue; chbuf[tlen]=0; tsN++; } page_ofs = tsN - PAGE_LEN; if (page_ofs < 0) page_ofs = 0; stop: dbg("load_ts_phrase\n"); // fclose(fp); }
void send_utf8_ch(char *bchar) { char tt[CH_SZ+1]; int len = utf8_sz(bchar); memcpy(tt, bchar, len); tt[len]=0; send_text(tt); }
int main() { FILE *fp; char fnamein[]="pin-juyin.src"; PIN_JUYIN pinju[1024]; short pinjuN=0; if ((fp=fopen(fnamein, "r"))==NULL) p_err("cannot open %s", fnamein); while (!feof(fp)) { char tt[128]; tt[0]=0; fgets(tt, sizeof(tt), fp); if (strlen(tt) < 3) break; char pin[16], ju[64]; bzero(pin, sizeof(pin)); sscanf(tt, "%s %s",pin, ju); phokey_t kk=0; int len = strlen(ju); int i=0; while (i<len) { kk |= lookup((u_char *)&ju[i]); i+=utf8_sz(&ju[i]); } // dbg("%s '%s' %d\n", pin, ju, kk); memcpy(pinju[pinjuN].pinyin, pin, sizeof(pinju[0].pinyin)); pinju[pinjuN].key = kk; pinjuN++; } fclose(fp); dbg("zz pinjuN:%d\n", pinjuN); qsort(pinju, pinjuN, sizeof(PIN_JUYIN), qcmp_str); char fnameout[]="pin-juyin.xlt"; if ((fp=fopen(fnameout, "wb"))==NULL) p_err("cannot create %s", fnameout); fwrite(&pinjuN, sizeof(pinjuN), 1, fp); fwrite(pinju, sizeof(PIN_JUYIN), pinjuN, fp); fclose(fp); return 0; }
void utf8ncpy(char *t, int tsize, char *s) { int tlen = 0; char *p=s; int slen = strlen(s); while (*p) { char sz = utf8_sz(p); if (tlen + sz > slen || tlen+sz >= tsize-1) // incomplete utf8 char break; memcpy(t + tlen, p, sz); tlen += sz; p+=sz; } t[tlen]=0; }
void add_ch_time_str(char *s) { int len= strlen(s); int i=0; while (i< len) { if (!(s[i] & 0x80)) { i++; continue; } i+=utf8_sz(s+i); add_ch_time(); } if (stat_enabled) disp_stat(); }
static void sym_lookup_key(char *instr, char *outstr) { if (current_method_type() == method_type_PHO || current_method_type() == method_type_TSIN) { str_to_all_phokey_chars(instr, outstr); } else { outstr[0]=0; while (*instr) { char tt[512]; tt[0]=0; lookup_gtab_out(instr, tt); strcat(outstr, tt); instr+= utf8_sz(instr); if (*instr) strcat(outstr, " | "); } } }
static int translate(char *fname, char *str, int strN, char **out) { char fullname[128]; if (!strN) { *out = strdup(str); return 0; } get_sys_table_file_name(fname, fullname); if ((fp=fopen(fullname, "rb"))==NULL) p_err("cannot open %s %s", fname, fullname); struct stat st; stat(fullname, &st); N = st.st_size / sizeof(T2S); char *p=str; char *endp = str + strN; int opN=0; char *op = NULL; while (p < endp) { op = (char *)realloc(op, opN+5); opN += k_lookup(p, &op[opN]); p+=utf8_sz(p); } fclose(fp); *out = op; op[opN]=0; return opN; }
static void cb_button_add(GtkButton *button, gpointer user_data) { GtkTextIter start, end; if (!gtk_text_buffer_get_selection_bounds(buffer, &start, &end)) return; char *utf8 = gtk_text_buffer_get_text(buffer, &start, &end, FALSE); strcpy(current_str, utf8); g_free(utf8); bigphoN = 0; char *p = current_str; while (*p) { char_pho *pbigpho = &bigpho[bigphoN++]; if (ph_key_sz==2) { pbigpho->phokeysN = utf8_pho_keys(p, (phokey_t*)pbigpho->phokeys); } else { pbigpho->phokeysN = lookup_gtab_key(p, pbigpho->phokeys); } p+=utf8_sz(p); if (!pbigpho->phokeysN) { dbg(" no mapping to pho\n"); return; } } GtkWidget *sel = create_pho_sel_area(); gtk_box_pack_start (GTK_BOX (hbox_buttons), sel, FALSE, FALSE, 20); gtk_widget_show_all(hbox_buttons); }
char *htmlspecialchars(char *s, char out[]) { struct { char c; char *str; } chs[]= {{'>',"gt"}, {'<',"lt"}, {'&',"amp"} #if 0 , {' ',"nbsp"} #endif }; int chsN=sizeof(chs)/sizeof(chs[0]); int outn=0; while (*s) { int sz = utf8_sz(s); int i; for(i=0; i<chsN; i++) if (chs[i].c==*s) break; if (i==chsN) { memcpy(&out[outn],s, sz); outn+=sz; s+=sz; } else { out[outn++]='&'; int len=strlen(chs[i].str); memcpy(&out[outn], chs[i].str, len); outn+=len; out[outn++]=';'; s++; } } out[outn]=0; return out; }
int main(int argc, char **argv) { char *fname = "pho.tab2.src"; FILE *fp; char s[64]; int phrase_area_N=0; char *phrase_area = NULL; if (!getenv("NO_GTK_INIT")) gtk_init(&argc, &argv); if (argc > 1) fname = argv[1]; if ((fp=fopen(fname,"rb"))==NULL) p_err("cannot open %s\n", fname); while (!feof(fp)) { s[0]=0; myfgets(s,sizeof(s),fp); int len=strlen(s); if (s[len-1]=='\n') s[--len]=0; if (len==0) continue; phokey_t kk=0; char *p = s; while (*p && *p!=' ' && *p!=9) { if (kk==(BACK_QUOTE_NO << 9)) kk|=*p; else kk |= lookup((u_char *)p); p += utf8_sz(p); } items[itemsN].key = kk; p++; char *str = p; while (*p && *p != ' ' && *p!=9) p++; *p = 0; p++; int slen = strlen(str); if (slen==utf8_sz(str)) { u8cpy((char *)items[itemsN].ch, str); } else { dbg("str %s\n", str); int newN = phrase_area_N + slen + 1; phrase_area = trealloc(phrase_area, char, newN); strcpy(phrase_area + phrase_area_N, str); items[itemsN].ch[0] = PHO_PHRASE_ESCAPE; items[itemsN].ch[1] = phrase_area_N & 0xff; items[itemsN].ch[2] = (phrase_area_N>>8) & 0xff; items[itemsN].ch[3] = (phrase_area_N>>16) & 0xff; phrase_area_N = newN; } items[itemsN].count = atoi(p); items[itemsN].oseq = itemsN; itemsN++; } fclose(fp); qsort(items, itemsN, sizeof(PHITEM), qcmp_key_del); int i; #if 1 int newN = 1; for(i=1;i<itemsN;i++) if (qcmp_key_del(&items[i-1], &items[i])) items[newN++] = items[i]; else { #if 0 prph(items[i].key); utf8_putchar((char *)items[i].ch); dbg("\n"); #endif } if (itemsN != newN) { dbg("deleted %d %d\n",itemsN, newN); itemsN = newN; } #endif qsort(items, itemsN, sizeof(PHITEM), qcmp_key); PHO_IDX pho_idx[3000]; u_short pho_idxN=0; for(i=0; i < itemsN; ) { phokey_t key = items[i].key; pho_idx[pho_idxN].key = key; pho_idx[pho_idxN].start = i; pho_idxN++; int j; for (j=i+1; j < itemsN && items[j].key == key; j++); int l; for(l=i; l<j; l++) { bchcpy(pho_items[pho_itemsN].ch, items[l].ch); pho_items[pho_itemsN].count = items[l].count; pho_itemsN++; } i = j; } char *tp = strstr(fname, ".tab2.src"); if (!tp) p_err("file name should be *.tab2.src"); tp = strstr(fname, ".src"); *tp=0; char *fname_out = fname; if ((fp=fopen(fname_out,"wb"))==NULL) p_err("cannot create %s\n", fname_out); fwrite("PH",1,2,fp); // dbg("pho_itemsN:%d pho_idxN:%d\n", pho_itemsN, pho_idxN); fwrite(&pho_idxN, sizeof(u_short), 1, fp); fwrite(&pho_itemsN, sizeof(pho_itemsN), 1, fp); fwrite(&phrase_area_N, sizeof(phrase_area_N), 1, fp); #if 0 fclose(fp); exit(0); #endif fwrite(pho_idx, sizeof(PHO_IDX), pho_idxN, fp); fwrite(pho_items, sizeof(PHO_ITEM), pho_itemsN, fp); fwrite(phrase_area, 1, phrase_area_N, fp); fclose(fp); if (getenv("HIME_NO_RELOAD")==NULL) { /* caleb- does found where "reload" is used. * caleb- think the send_hime_message() here does nothing. */ send_hime_message(GDK_DISPLAY(), "reload"); } return 0; }
int main(int argc, char **argv) { FILE *fp; char s[128]; int i,len; PHOKBM phkb; char num, typ, chk; char fnamesrc[40]; char fnameout[40]; if (argc < 2) { puts("file name expected"); exit(1); } bzero(&phkb,sizeof(phkb)); strcpy(fnameout,argv[1]); char *p; if ((p=strchr(fnameout, '.'))) *p = 0; strcpy(fnamesrc,fnameout); strcat(fnamesrc,".kbmsrc"); strcat(fnameout,".kbm"); if ((fp=fopen(fnamesrc,"r"))==NULL) { printf("Cannot open %s\n", fnamesrc); exit(1); } // fgets(s,sizeof(s),fp); // len=strlen(s); // s[len-1]=0; // strcpy(phkb.selkey, s); // phkb.selkeyN = strlen(s); while (!feof(fp)) { s[0]=0; fgets(s,sizeof(s),fp); len=strlen(s); if (!len) break; if (s[len-1]=='\n') s[--len]=0; if (!len) break; if (!pho_lookup(s, &num, &typ)) p_err("err found %s", s); int utf8sz = utf8_sz(s); chk=s[utf8sz + 1]; if (chk>='A' && chk<='Z') chk+=32; for(i=0;i<3;i++) { if (!phkb.phokbm[(int)chk][i].num) { phkb.phokbm[(int)chk][i].num=num; phkb.phokbm[(int)chk][i].typ=typ; // printf("%c %d %d i:%d\n", chk, num, typ, i); break; } } } fclose(fp); if (strstr(fnamesrc, "pinyin")) phkb.phokbm[' '][0].num=0; phkb.phokbm[' '][0].typ=3; if ((fp=fopen(fnameout,"w"))==NULL) { printf("Cannot create %s\n", fnameout); exit(1); } fwrite(&phkb,sizeof(phkb),1,fp); fclose(fp); exit(0); }
int main(int argc, char **argv) { gtk_init(&argc, &argv); #if 1 if (argc != 3) p_err("%s a_file.gtab outfile", argv[0]); #endif #if 1 char *infile = argv[1]; char *outfile = argv[2]; #else char *infile = "data/ar30.gtab"; char *outfile = "l"; #endif FILE *fr; if ((fr=fopen(infile, "rb"))==NULL) p_err("cannot err open %s", infile); FILE *fp_out; if ((fp_out=fopen(outfile,"w"))==NULL) { printf("Cannot open %s", outfile); exit(-1); } struct TableHead th; fread(&th,1, sizeof(th), fr); #if NEED_SWAP swap_byte_4(&th.version); swap_byte_4(&th.flag); swap_byte_4(&th.space_style); swap_byte_4(&th.KeyS); swap_byte_4(&th.MaxPress); swap_byte_4(&th.M_DUP_SEL); swap_byte_4(&th.DefC); for(i=0; i <= KeyNum; i++) swap_byte_4(&idx1[i]); #endif int KeyNum = th.KeyS; dbg("keys %d\n",KeyNum); if (!th.keybits) th.keybits = 6; dbg("keybits:%d maxPress:%d\n", th.keybits, th.MaxPress); int max_keyN; if (th.MaxPress*th.keybits > 32) { max_keyN = 64 / th.keybits; key64 = TRUE; dbg("it's a 64-bit .gtab\n"); } else { max_keyN = 32 / th.keybits; key64 = FALSE; } dbg("key64:%d\n", key64); char kname[128][CH_SZ]; char keymap[128]; gtab_idx1_t idx1[256]; static char kno[128]; itN = th.DefC; bzero(keymap, sizeof(keymap)); fread(keymap, 1, th.KeyS, fr); fread(kname, CH_SZ, th.KeyS, fr); fread(idx1, sizeof(gtab_idx1_t), KeyNum+1, fr); int i; for(i=0; i < th.KeyS; i++) { kno[keymap[i]] = i; } fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", th.keybits, th.MaxPress, keymap+1); if (key64) { fread(it64, sizeof(ITEM64), th.DefC, fr); qsort(it64, th.DefC, sizeof(ITEM64), qcmp_ch64); } else { fread(it, sizeof(ITEM), th.DefC, fr); qsort(it, th.DefC, sizeof(ITEM), qcmp_ch); } itN = th.DefC; // dbg("itN:%d\n", itN); #if 0 for(i=0; i < itN; i++) { printf("\n%d ", i); utf8_putchar(it64[i].ch); } #endif fclose(fr); char fname[128]; get_gcin_user_fname(tsin32_f, fname); FILE *fp; if ((fp=fopen(fname,"rb"))==NULL) { printf("Cannot open %s", fname); exit(-1); } while (!feof(fp)) { int i; phokey_t phbuf[MAX_PHRASE_LEN]; u_char clen; usecount_t usecount; fread(&clen,1,1,fp); fread(&usecount, sizeof(usecount_t), 1,fp); fread(phbuf,sizeof(phokey_t), clen, fp); char str[MAX_PHRASE_LEN * CH_SZ + 1]; int strN = 0; KKARR kk[MAX_PHRASE_LEN]; KKARR64 kk64[MAX_PHRASE_LEN]; gboolean has_err = FALSE; if (key64) bzero(kk64, sizeof(kk64)); else bzero(kk, sizeof(kk)); // dbg("clen %d\n", clen); for(i=0;i<clen;i++) { char ch[CH_SZ]; int n = fread(ch, 1, 1, fp); if (n<=0) goto stop; int len=utf8_sz(ch); fread(&ch[1], 1, len-1, fp); // utf8_putchar(ch); if (key64) { if (!(kk64[i].arr = find_ch64(ch, &kk64[i].N))) has_err = TRUE; } else { if (!(kk[i].arr = find_ch(ch, &kk[i].N))) has_err = TRUE; } memcpy(str+strN, ch, len); strN+=len; } if (has_err) { // dbg("has_error\n"); continue; } #if 0 for(i=0; i < clen; i++) printf("%d ", kk64[i].N); printf("\n"); #endif str[strN]=0; int permN; if (key64) { permN=kk64[0].N; for(i=1;i<clen;i++) permN *= kk64[i].N; } else { permN=kk[0].N; for(i=1;i<clen;i++) permN *= kk[i].N; } int z; for(z=0; z < permN; z++) { char vz[MAX_PHRASE_LEN]; int tz = z; if (key64) { for(i=0; i < clen; i++) { vz[i] = tz % kk64[i].N; tz /= kk64[i].N; } } else { for(i=0; i < clen; i++) { vz[i] = tz % kk[i].N; tz /= kk[i].N; } } char kstr[512]; kstr[0]=0; for(i=0;i<clen;i++) { char tkey[16]; u_int64_t k=0; if (key64) { memcpy(&k, kk64[i].arr[vz[i]].key, 8); } else { u_int t; memcpy(&t, kk[i].arr[vz[i]].key, 4); k = t; } get_keymap_str(k, keymap, th.keybits, tkey); strcat(kstr, tkey); strcat(kstr, " "); } fprintf(fp_out,"%s %s%d\n", str, kstr, usecount); } } stop: fclose(fp); fclose(fp_out); return 0; }
int main(int argc, char **argv) { FILE *fp,*fw; char s[1024]; u_char chbuf[MAX_PHRASE_LEN * CH_SZ]; u_short phbuf[80]; u_int phbuf32[80]; u_int64_t phbuf64[80]; int i,j,idx,len, ofs; u_short kk; u_int64_t kk64; int hashidx[TSIN_HASH_N]; u_char clen; int lineCnt=0; gboolean reload = getenv("HIME_NO_RELOAD")==NULL; if (reload) { dbg("need reload\n"); } else { dbg("NO_GTK_INIT\n"); } if (getenv("NO_GTK_INIT")==NULL) gtk_init(&argc, &argv); dbg("enter %s\n", argv[0]); if (argc < 2) p_err("must specify input file"); init_TableDir(); if ((fp=fopen(argv[1], "rb"))==NULL) { printf("Cannot open %s\n", argv[1]); exit(-1); } skip_utf8_sigature(fp); char *outfile; int fofs = ftell(fp); myfgets(s, sizeof(s), fp); if (strstr(s, "!!pinyin")) { b_pinyin = TRUE; printf("is pinyin\n"); load_pin_juyin(); } else fseek(fp, fofs, SEEK_SET); fofs = ftell(fp); int keybits=0, maxkey=0; char keymap[128]; char kno[128]; bzero(kno, sizeof(kno)); myfgets(s, sizeof(s), fp); puts(s); if (strstr(s, TSIN_GTAB_KEY)) { is_gtab = TRUE; lineCnt++; if (argc < 3) p_err("useage %s input_file output_file", argv[0]); outfile = argv[2]; len=strlen((char *)s); if (s[len-1]=='\n') s[--len]=0; char aa[128]; keymap[0]=' '; sscanf(s, "%s %d %d %s", aa, &keybits, &maxkey, keymap+1); for(i=0; keymap[i]; i++) kno[keymap[i]]=i; if (maxkey * keybits > 32) gtabkey64 = TRUE; } else { if (argc==3) outfile = argv[2]; else outfile = "tsin32"; fseek(fp, fofs, SEEK_SET); } INMD inmd, *cur_inmd = &inmd; char *cphbuf; if (is_gtab) { cur_inmd->keybits = keybits; if (gtabkey64) { cphbuf = (char *)phbuf64; phsz = 8; key_cmp = key_cmp64; hash_shift = TSIN_HASH_SHIFT_64; cur_inmd->key64 = TRUE; } else { cphbuf = (char *)phbuf32; phsz = 4; hash_shift = TSIN_HASH_SHIFT_32; key_cmp = key_cmp32; cur_inmd->key64 = FALSE; } cur_inmd->last_k_bitn = (((cur_inmd->key64 ? 64:32) / cur_inmd->keybits) - 1) * cur_inmd->keybits; dbg("cur_inmd->last_k_bitn %d\n", cur_inmd->last_k_bitn); } else { cphbuf = (char *)phbuf; phsz = 2; key_cmp = key_cmp16; hash_shift = TSIN_HASH_SHIFT; } dbg("phsz: %d\n", phsz); phcount=ofs=0; while (!feof(fp)) { usecount_t usecount=0; lineCnt++; myfgets((char *)s,sizeof(s),fp); len=strlen((char *)s); if (s[0]=='#') continue; if (strstr(s, TSIN_GTAB_KEY)) continue; if (s[len-1]=='\n') s[--len]=0; if (len==0) continue; i=0; int chbufN=0; int charN = 0; while (s[i]!=' ' && i<len) { int len = utf8_sz((char *)&s[i]); memcpy(&chbuf[chbufN], &s[i], len); i+=len; chbufN+=len; charN++; } while ((i < len && s[i]==' ') || s[i]=='\t') i++; int phbufN=0; while (i<len && phbufN < charN && s[i]!=' ') { if (is_gtab) { kk64=0; int idx=0; while (s[i]!=' ' && i<len) { int k = kno[s[i]]; kk64|=(u_int64_t)k << ( LAST_K_bitN - idx*keybits); i++; idx++; } if (phsz==8) phbuf64[phbufN++]=kk64; else phbuf32[phbufN++]=(u_int)kk64; } else { kk=0; if (b_pinyin) { kk = pinyin2phokey(s+i); while (s[i]!=' ' && i<len) i++; } else { while (s[i]!=' ' && i<len) { if (kk==(BACK_QUOTE_NO << 9)) kk|=s[i]; else kk |= lookup((u_char *)&s[i]); i+=utf8_sz((char *)&s[i]); } } phbuf[phbufN++]=kk; } i++; } if (phbufN!=charN) { p_err("%s Line %d problem in phbufN!=chbufN %d != %d\n", s, lineCnt, phbufN, chbufN); } clen=phbufN; while (i<len && s[i]==' ') i++; if (i==len) usecount = 0; else usecount = atoi((char *)&s[i]); /* printf("len:%d\n", clen); */ if (phcount >= phidxsize) { phidxsize+=1024; if (!(phidx=(int *)realloc(phidx,phidxsize*4))) { puts("realloc err"); exit(1); } } phidx[phcount++]=ofs; int new_bfN = ofs + 1 + sizeof(usecount_t)+ phsz * clen + chbufN; if (bfsize < new_bfN) { bfsize = new_bfN + 1024*1024; bf = (char *)realloc(bf, bfsize); } memcpy(&bf[ofs++],&clen,1); memcpy(&bf[ofs],&usecount, sizeof(usecount_t)); ofs+=sizeof(usecount_t); memcpy(&bf[ofs], cphbuf, clen * phsz); ofs+=clen * phsz; memcpy(&bf[ofs], chbuf, chbufN); ofs+=chbufN; } fclose(fp); /* dumpbf(bf,phidx); */ puts("Sorting ...."); qsort(phidx,phcount, sizeof(phidx[0]),qcmp); if (!(sf=(u_char *)malloc(bfsize))) { puts("malloc err"); exit(1); } if (!(sidx=(int *)malloc(phidxsize*sizeof(int)))) { puts("malloc err"); exit(1); } // delete duplicate ofs=0; j=0; for(i=0;i<phcount;i++) { idx = phidx[i]; sidx[j]=ofs; len=bf[idx]; int tlen = utf8_tlen(&bf[idx + 1 + sizeof(usecount_t) + phsz*len], len); clen= phsz*len + tlen + 1 + sizeof(usecount_t); if (i && !qcmp_eq(&phidx[i-1], &phidx[i])) continue; memcpy(&sf[ofs], &bf[idx], clen); j++; ofs+=clen; } phcount=j; #if 1 puts("Sorting by usecount ...."); qsort(sidx, phcount, 4, qcmp_usecount); #endif for(i=0;i<256;i++) hashidx[i]=-1; for(i=0;i<phcount;i++) { idx=sidx[i]; idx+= 1 + sizeof(usecount_t); int v; if (phsz==2) { phokey_t kk; memcpy(&kk, &sf[idx], phsz); v = kk >> TSIN_HASH_SHIFT; } else if (phsz==4) {
int main(int argc, char **argv) { FILE *fp; int i; u_char clen; usecount_t usecount; gboolean pr_usecount = TRUE; char *fname; char *fname_out = NULL; if (argc <= 1) { printf("%s: file name expected\n", argv[0]); exit(1); } for(i=1; i < argc;) { if (!strcmp(argv[i], "-nousecount")) { i++; pr_usecount = FALSE; } else if (!strcmp(argv[i], "-o")) { if (i==argc-1) p_err("-o need out file name"); fname_out = argv[i+1]; i+=2; } else fname = argv[i++]; } FILE *fp_out; if (!fname_out) { fp_out = stdout; } else { dbg("%s use %s\n", argv[0], fname_out); fp_out = fopen(fname_out, "w"); if (!fp_out) p_err("cannot create %s\n", fname_out); } if ((fp=fopen(fname,"rb"))==NULL) p_err("Cannot open %s", argv[1]); TSIN_GTAB_HEAD head; int phsz = 2; fread(&head, sizeof(head), 1, fp); if (!strcmp(head.signature, TSIN_GTAB_KEY)) { if (head.maxkey * head.keybits > 32) phsz = 8; else phsz = 4; } else rewind(fp); if (phsz > 2) { fprintf(stderr, "phsz %d keybits:%d\n", phsz, head.keybits); fprintf(stderr, "keymap '%s'\n", head.keymap); fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", head.keybits, head.maxkey, head.keymap+1); } while (!feof(fp)) { phokey_t phbuf[MAX_PHRASE_LEN]; u_int phbuf32[MAX_PHRASE_LEN]; u_int64_t phbuf64[MAX_PHRASE_LEN]; fread(&clen,1,1,fp); fread(&usecount, sizeof(usecount_t), 1,fp); if (!pr_usecount) usecount = 0; if (phsz==2) fread(phbuf, sizeof(phokey_t), clen, fp); else if (phsz==4) fread(phbuf32, 4, clen, fp); else if (phsz==8) fread(phbuf64, 8, clen, fp); for(i=0;i<clen;i++) { char ch[CH_SZ]; int n = fread(ch, 1, 1, fp); if (n<=0) goto stop; int len=utf8_sz(ch); fread(&ch[1], 1, len-1, fp); int j; for(j=0; j < len; j++) fprintf(fp_out, "%c", ch[j]); } fprintf(fp_out, " "); for(i=0;i<clen;i++) { if (phsz==2) prph2(fp_out, phbuf[i]); else { u_int64_t k; if (phsz==4) k = phbuf32[i]; else k = phbuf64[i]; char tkey[16]; get_keymap_str(k, head.keymap, head.keybits, tkey); fprintf(fp_out, "%s", tkey); } if (i!=clen-1) fprintf(fp_out, " "); } fprintf(fp_out, " %d\n", usecount); } stop: fclose(fp); fclose(fp_out); exit(0); }
int main(int argc, char **argv) { FILE *fp; int i; char clen; usecount_t usecount; gboolean pr_usecount = TRUE; char *fname; char *fname_out = NULL; gtk_init(&argc, &argv); if (argc <= 1) { printf("%s: file name expected\n", argv[0]); exit(1); } init_TableDir(); gboolean b_pinyin = is_pinyin_kbm(); for(i=1; i < argc;) { if (!strcmp(argv[i], "-nousecount")) { i++; pr_usecount = FALSE; b_pinyin = FALSE; } else if (!strcmp(argv[i], "-o")) { if (i==argc-1) p_err("-o need out file name"); fname_out = argv[i+1]; i+=2; } else fname = argv[i++]; } FILE *fp_out; if (!fname_out) { fp_out = stdout; } else { dbg("%s use %s\n", argv[0], fname_out); fp_out = fopen(fname_out, "w"); if (!fp_out) p_err("cannot create %s\n", fname_out); } if (b_pinyin) fprintf(fp_out, "!!pinyin\n"); if ((fp=fopen(fname,"rb"))==NULL) p_err("Cannot open %s %s", fname, sys_err_strA()); TSIN_GTAB_HEAD head; int phsz = 2; fread(&head, sizeof(head), 1, fp); if (!strcmp(head.signature, TSIN_GTAB_KEY)) { if (head.maxkey * head.keybits > 32) phsz = 8; else phsz = 4; } else rewind(fp); if (phsz > 2) { fprintf(stderr, "phsz %d keybits:%d\n", phsz, head.keybits); fprintf(stderr, "keymap '%s'\n", head.keymap); fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", head.keybits, head.maxkey, head.keymap+1); } while (!feof(fp)) { phokey_t phbuf[MAX_PHRASE_LEN]; u_int phbuf32[MAX_PHRASE_LEN]; u_int64_t phbuf64[MAX_PHRASE_LEN]; gboolean is_deleted = FALSE; fread(&clen,1,1,fp); if (clen < 0) { clen = - clen; is_deleted = TRUE; } fread(&usecount, sizeof(usecount_t), 1,fp); if (!pr_usecount) usecount = 0; if (phsz==2) fread(phbuf, sizeof(phokey_t), clen, fp); else if (phsz==4) fread(phbuf32, 4, clen, fp); else if (phsz==8) fread(phbuf64, 8, clen, fp); char tt[512]; int ttlen=0; tt[0]=0; for(i=0;i<clen;i++) { char ch[CH_SZ]; int n = fread(ch, 1, 1, fp); if (n<=0) goto stop; int len=utf8_sz(ch); fread(&ch[1], 1, len-1, fp); memcpy(tt+ttlen, ch, len); ttlen+=len; } tt[ttlen]=0; if (!tt[0]) continue; if (is_deleted) continue; fprintf(fp_out, "%s ", tt); for(i=0;i<clen;i++) { if (phsz==2) { if (b_pinyin) { char *t = phokey2pinyin(phbuf[i]); // dbg("z %s\n", t); fprintf(fp_out, "%s", t); } else prph2(fp_out, phbuf[i]); } else { u_int64_t k; if (phsz==4) k = phbuf32[i]; else k = phbuf64[i]; char tkey[16]; get_keymap_str(k, head.keymap, head.keybits, tkey); fprintf(fp_out, "%s", tkey); } if (i!=clen-1) fprintf(fp_out, " "); } fprintf(fp_out, " %d\n", usecount); } stop: fclose(fp); fclose(fp_out); exit(0); }