Exemplo n.º 1
0
static void cb_button_add(GtkButton *button, gpointer user_data)
{
    bigphoN = 0;
    char *p = current_str;
    while (*p) {
        char_pho *pbigpho = &bigpho[bigphoN++];

        if (ph_key_sz==2) {
            pbigpho->phokeysN = utf8_pho_keys(p, (phokey_t*)pbigpho->phokeys);
        } else {
            pbigpho->phokeysN = lookup_gtab_key(p, pbigpho->phokeys);
        }

        p+=utf8_sz(p);

        if (!pbigpho->phokeysN) {
            dbg(" no mapping to pho\n");
            return;
        }
    }

    GtkWidget *sel =  create_pho_sel_area();
    gtk_box_pack_start (GTK_BOX (hbox_buttons), sel, FALSE, FALSE, 20);

    gtk_widget_show_all(hbox_buttons);

}
Exemplo n.º 2
0
int pho_lookup(char *s, char *num, char *typ)
{
  int i;
  char tt[CH_SZ+1], *pp;
  int len = utf8_sz(s);

  if (utf8_eq(s, "1")) {
    *num = 0;
    *typ = 3;
    return TRUE;
  }

  if (!(*s&0x80))
    return *s-'0';

  bchcpy(tt, s);
  tt[len]=0;

  for(i=0;i<4;i++) {
    if ((pp=strstr(pho_chars[i], tt)))
      break;
  }

  if (!pp)
    return FALSE;

  *typ=i;
  *num=(pp - pho_chars[i])/3;

  return TRUE;
}
Exemplo n.º 3
0
void str_to_all_phokey_chars(char *u8_str, char *out)
{
  out[0]=0;

  while (*u8_str) {
    phokey_t phos[32];

    int n=utf8_pho_keys(u8_str, phos);
#if 0
    utf8_putchar(u8_str);
    dbg("n %d\n", n);
#endif
    int i;
    for(i=0; i < n; i++) {
      char *pstr = phokey_to_str(phos[i]);
      strcat(out, pstr);
      if (i < n -1)
        strcat(out, " ");
    }

    u8_str+=utf8_sz(u8_str);

    if (*u8_str)
      strcat(out, " | ");
  }
}
Exemplo n.º 4
0
int gtab_key2name(INMD *tinmd, u_int64_t key, char *t, int *rtlen)
{
    int tlen=0, klen=0;

    int j;
    for(j=Max_tab_key_num1(tinmd) - 1; j>=0; j--) {
        int sh = j * KeyBits1(tinmd);
        int k = (key >> sh) & tinmd->kmask;

        if (!k)
            break;
        int len;
        char *keyname;

        if (tinmd->keyname_lookup) {
            len = 1;
            keyname = (char *)&tinmd->keyname_lookup[k];
        } else {
            keyname = (char *)&tinmd->keyname[k * CH_SZ];
            len = (*keyname & 0x80) ? utf8_sz(keyname) : strlen(keyname);
        }
//      dbg("uuuuuuuuuuuu %d %x len:%d\n", k, tinmd->keyname[k], len);
        memcpy(&t[tlen], keyname, len);
        tlen+=len;
        klen++;
    }

    t[tlen]=0;
    *rtlen = tlen;
    return klen;
}
Exemplo n.º 5
0
void load_ts_phrase()
{
  FILE *fp = tsin_hand.fph;

  int i;
  for(i=0; i < phraseN; i++)
    free(phrase[i]);
  free(phrase); phrase = NULL;
  phraseN = 0;

  dbg("fname %s\n", current_tsin_fname);

  int ofs = is_gtab ? sizeof(TSIN_GTAB_HEAD):0;
  fseek(fp, ofs, SEEK_SET);

  while (!feof(fp)) {
    u_int64_t phbuf[MAX_PHRASE_LEN];
    char chbuf[MAX_PHRASE_LEN * CH_SZ + 1];
    u_char clen;
    usecount_t usecount;

    clen = 0;

    fread(&clen,1,1,fp);

    if (clen > MAX_PHRASE_LEN)
      p_err("bad tsin db clen %d > MAX_PHRASE_LEN %d\n", clen, MAX_PHRASE_LEN);

    fread(&usecount,sizeof(usecount_t), 1, fp);
    fread(phbuf, ph_key_sz, clen, fp);
    int tlen = 0;

    for(i=0; i < clen; i++) {
      int n = fread(&chbuf[tlen], 1, 1, fp);
      if (n<=0)
        goto stop;
      int len=utf8_sz(&chbuf[tlen]);
      fread(&chbuf[tlen+1], 1, len-1, fp);
      tlen+=len;
    }

    if (clen < 2)
      continue;

    chbuf[tlen]=0;
    phrase = trealloc(phrase, char *, phraseN+1);

    phrase[phraseN++] = strdup(chbuf);
  }


stop:

//  fclose(fp);

  qsort(phrase, phraseN, sizeof(char *), qcmp_str);

  dbg("phraseN: %d\n", phraseN);
}
Exemplo n.º 6
0
void load_ts_phrase()
{
    FILE *fp = tsin_hand.fph;

    int i;
    dbg("fname %s\n", current_tsin_fname);

    int ofs = is_gtab ? sizeof(TSIN_GTAB_HEAD):0;
    fseek(fp, ofs, SEEK_SET);

    tsN=0;
    free(ts_idx);
    ts_idx=NULL;

    while (!feof(fp)) {
        ts_idx = trealloc(ts_idx, int, tsN);
        ts_idx[tsN] = ftell(fp);
        u_int64_t phbuf[MAX_PHRASE_LEN];
        char chbuf[MAX_PHRASE_LEN * CH_SZ + 1];
        u_char clen;
        usecount_t usecount;
        clen = 0;

        fread(&clen,1,1,fp);

        if (clen > MAX_PHRASE_LEN) {
            box_warn("bad tsin db clen %d > MAX_PHRASE_LEN %d\n", clen, MAX_PHRASE_LEN);
            break;
        }

        fread(&usecount,sizeof(usecount_t), 1, fp);
        fread(phbuf, ph_key_sz, clen, fp);
        int tlen = 0;

        for(i=0; i < clen; i++) {
            int n = fread(&chbuf[tlen], 1, 1, fp);
            if (n<=0)
                goto stop;
            int len=utf8_sz(&chbuf[tlen]);
            fread(&chbuf[tlen+1], 1, len-1, fp);
            tlen+=len;
        }

        if (clen < 2)
            continue;

        chbuf[tlen]=0;
        tsN++;
    }

    page_ofs = tsN - PAGE_LEN;
    if (page_ofs < 0)
        page_ofs = 0;

stop:
    dbg("load_ts_phrase\n");
//  fclose(fp);

}
Exemplo n.º 7
0
void send_utf8_ch(char *bchar)
{
  char tt[CH_SZ+1];
  int len = utf8_sz(bchar);

  memcpy(tt, bchar, len);
  tt[len]=0;

  send_text(tt);
}
Exemplo n.º 8
0
int main()
{
  FILE *fp;
  char fnamein[]="pin-juyin.src";
  PIN_JUYIN pinju[1024];
  short pinjuN=0;

  if ((fp=fopen(fnamein, "r"))==NULL)
    p_err("cannot open %s", fnamein);

  while (!feof(fp)) {
    char tt[128];

    tt[0]=0;
    fgets(tt, sizeof(tt), fp);
    if (strlen(tt) < 3)
      break;

    char pin[16], ju[64];
    bzero(pin, sizeof(pin));
    sscanf(tt, "%s %s",pin, ju);

    phokey_t kk=0;
    int len = strlen(ju);
    int i=0;
    while (i<len) {
      kk |= lookup((u_char *)&ju[i]);
      i+=utf8_sz(&ju[i]);
    }

//    dbg("%s '%s' %d\n", pin, ju, kk);

    memcpy(pinju[pinjuN].pinyin, pin, sizeof(pinju[0].pinyin));
    pinju[pinjuN].key = kk;
    pinjuN++;
  }

  fclose(fp);
  dbg("zz pinjuN:%d\n", pinjuN);
  
  qsort(pinju, pinjuN, sizeof(PIN_JUYIN), qcmp_str);

  char fnameout[]="pin-juyin.xlt";

  if ((fp=fopen(fnameout, "wb"))==NULL)
    p_err("cannot create %s", fnameout);

  fwrite(&pinjuN, sizeof(pinjuN), 1, fp);
  fwrite(pinju, sizeof(PIN_JUYIN), pinjuN, fp);
  fclose(fp);

  return 0;
}
Exemplo n.º 9
0
void utf8ncpy(char *t, int tsize, char *s)
{
   int tlen = 0;
   char *p=s;
   int slen = strlen(s);
   while (*p) {
     char sz = utf8_sz(p);
     if (tlen + sz > slen || tlen+sz >= tsize-1) // incomplete utf8 char
       break;
     memcpy(t + tlen, p, sz);
     tlen += sz;
     p+=sz;
   }
   t[tlen]=0;
}
Exemplo n.º 10
0
void add_ch_time_str(char *s)
{
  int len= strlen(s);
  int i=0;
  while (i< len) {
    if (!(s[i] & 0x80)) {
      i++;
      continue;
    }
    i+=utf8_sz(s+i);
    add_ch_time();
  }

  if (stat_enabled)
    disp_stat();
}
Exemplo n.º 11
0
static void sym_lookup_key(char *instr, char *outstr)
{
  if (current_method_type() == method_type_PHO || current_method_type() == method_type_TSIN) {
    str_to_all_phokey_chars(instr, outstr);
  } else {
    outstr[0]=0;

    while (*instr) {
      char tt[512];
      tt[0]=0;
      lookup_gtab_out(instr, tt);
      strcat(outstr, tt);

      instr+= utf8_sz(instr);

      if (*instr)
          strcat(outstr, " | ");
    }
  }
}
Exemplo n.º 12
0
static int translate(char *fname, char *str, int strN, char **out)
{
  char fullname[128];

  if (!strN) {
    *out = strdup(str);
    return 0;
  }

  get_sys_table_file_name(fname, fullname);

  if ((fp=fopen(fullname, "rb"))==NULL)
    p_err("cannot open %s %s", fname, fullname);

  struct stat st;

  stat(fullname, &st);
  N = st.st_size / sizeof(T2S);

  char *p=str;
  char *endp = str + strN;
  int opN=0;
  char *op = NULL;

  while (p < endp) {
    op = (char *)realloc(op, opN+5);
    opN += k_lookup(p, &op[opN]);
    p+=utf8_sz(p);
  }

  fclose(fp);

  *out = op;
  op[opN]=0;

  return opN;
}
Exemplo n.º 13
0
static void cb_button_add(GtkButton *button, gpointer user_data)
{
  GtkTextIter start, end;

  if (!gtk_text_buffer_get_selection_bounds(buffer, &start, &end))
    return;

  char *utf8 = gtk_text_buffer_get_text(buffer, &start, &end, FALSE);
  strcpy(current_str, utf8);

  g_free(utf8);

  bigphoN = 0;
  char *p = current_str;
  while (*p) {
    char_pho *pbigpho = &bigpho[bigphoN++];

    if (ph_key_sz==2) {
      pbigpho->phokeysN = utf8_pho_keys(p, (phokey_t*)pbigpho->phokeys);
    } else {
      pbigpho->phokeysN = lookup_gtab_key(p, pbigpho->phokeys);
    }

    p+=utf8_sz(p);

    if (!pbigpho->phokeysN) {
      dbg(" no mapping to pho\n");
      return;
    }
  }

  GtkWidget *sel =  create_pho_sel_area();
  gtk_box_pack_start (GTK_BOX (hbox_buttons), sel, FALSE, FALSE, 20);

  gtk_widget_show_all(hbox_buttons);

}
Exemplo n.º 14
0
char *htmlspecialchars(char *s, char out[])
{
  struct {
    char c;
    char *str;
  } chs[]= {{'>',"gt"}, {'<',"lt"}, {'&',"amp"}
#if 0
  , {' ',"nbsp"}
#endif
  };
  int chsN=sizeof(chs)/sizeof(chs[0]);

  int outn=0;
  while (*s) {
    int sz = utf8_sz(s);
    int i;
    for(i=0; i<chsN; i++)
      if (chs[i].c==*s)
        break;
    if (i==chsN) {
      memcpy(&out[outn],s, sz);
      outn+=sz;
      s+=sz;
    }
    else {
      out[outn++]='&';
      int len=strlen(chs[i].str);
      memcpy(&out[outn], chs[i].str, len);
      outn+=len;
      out[outn++]=';';
      s++;
    }
  }

  out[outn]=0;
  return out;
}
Exemplo n.º 15
0
int main(int argc, char **argv)
{
  char *fname = "pho.tab2.src";
  FILE *fp;
  char s[64];
  int phrase_area_N=0;
  char *phrase_area = NULL;

  if (!getenv("NO_GTK_INIT"))
    gtk_init(&argc, &argv);

  if (argc > 1)
    fname = argv[1];

  if ((fp=fopen(fname,"rb"))==NULL)
    p_err("cannot open %s\n", fname);


  while (!feof(fp)) {
    s[0]=0;
    myfgets(s,sizeof(s),fp);
    int len=strlen(s);

    if (s[len-1]=='\n')
      s[--len]=0;

    if (len==0)
      continue;

    phokey_t kk=0;
    char *p = s;

    while (*p && *p!=' ' && *p!=9) {
      if (kk==(BACK_QUOTE_NO << 9))
        kk|=*p;
      else
        kk |= lookup((u_char *)p);

      p += utf8_sz(p);
    }

    items[itemsN].key = kk;

    p++;

    char *str = p;
    while (*p && *p != ' ' && *p!=9)
      p++;

    *p = 0;
    p++;

    int slen = strlen(str);
    if (slen==utf8_sz(str)) {
      u8cpy((char *)items[itemsN].ch, str);
    } else {
      dbg("str %s\n", str);
      int newN = phrase_area_N + slen + 1;
      phrase_area = trealloc(phrase_area, char, newN);
      strcpy(phrase_area + phrase_area_N, str);
      items[itemsN].ch[0] = PHO_PHRASE_ESCAPE;
      items[itemsN].ch[1] = phrase_area_N & 0xff;
      items[itemsN].ch[2] = (phrase_area_N>>8) & 0xff;
      items[itemsN].ch[3] = (phrase_area_N>>16) & 0xff;
      phrase_area_N = newN;
    }

    items[itemsN].count = atoi(p);
    items[itemsN].oseq = itemsN;

    itemsN++;
  }

  fclose(fp);


  qsort(items, itemsN, sizeof(PHITEM), qcmp_key_del);
  int i;

#if 1
  int newN = 1;
  for(i=1;i<itemsN;i++)
    if (qcmp_key_del(&items[i-1], &items[i]))
      items[newN++] = items[i];
    else {
#if 0
      prph(items[i].key);
      utf8_putchar((char *)items[i].ch);
      dbg("\n");
#endif
    }

  if (itemsN != newN) {
    dbg("deleted %d %d\n",itemsN, newN);
    itemsN = newN;
  }
#endif

  qsort(items, itemsN, sizeof(PHITEM), qcmp_key);

  PHO_IDX pho_idx[3000];
  u_short pho_idxN=0;

  for(i=0; i < itemsN; ) {
    phokey_t key = items[i].key;
    pho_idx[pho_idxN].key = key;
    pho_idx[pho_idxN].start = i;
    pho_idxN++;

    int j;

    for (j=i+1; j < itemsN && items[j].key == key; j++);

    int l;
    for(l=i; l<j; l++) {
      bchcpy(pho_items[pho_itemsN].ch, items[l].ch);
      pho_items[pho_itemsN].count = items[l].count;
      pho_itemsN++;
    }

    i = j;
  }

  char *tp = strstr(fname, ".tab2.src");
  if (!tp)
    p_err("file name should be *.tab2.src");

  tp = strstr(fname, ".src");
  *tp=0;

  char *fname_out = fname;

  if ((fp=fopen(fname_out,"wb"))==NULL)
    p_err("cannot create %s\n", fname_out);

  fwrite("PH",1,2,fp);
//  dbg("pho_itemsN:%d  pho_idxN:%d\n", pho_itemsN, pho_idxN);
  fwrite(&pho_idxN, sizeof(u_short), 1, fp);
  fwrite(&pho_itemsN, sizeof(pho_itemsN), 1, fp);
  fwrite(&phrase_area_N, sizeof(phrase_area_N), 1, fp);
#if 0
  fclose(fp); exit(0);
#endif
  fwrite(pho_idx, sizeof(PHO_IDX), pho_idxN, fp);
  fwrite(pho_items, sizeof(PHO_ITEM), pho_itemsN, fp);

  fwrite(phrase_area, 1, phrase_area_N, fp);

  fclose(fp);

  if (getenv("HIME_NO_RELOAD")==NULL) {
    /* caleb- does found where "reload" is used.
     * caleb- think the send_hime_message() here does nothing.
     */
    send_hime_message(GDK_DISPLAY(), "reload");
  }

  return 0;
}
Exemplo n.º 16
0
int main(int argc, char **argv)
{
  FILE *fp;
  char s[128];
  int i,len;
  PHOKBM phkb;
  char num, typ, chk;
  char fnamesrc[40];
  char fnameout[40];

  if (argc < 2) {
    puts("file name expected");
    exit(1);
  }

  bzero(&phkb,sizeof(phkb));
  strcpy(fnameout,argv[1]);

  char *p;
  if ((p=strchr(fnameout, '.')))
    *p = 0;

  strcpy(fnamesrc,fnameout);
  strcat(fnamesrc,".kbmsrc");
  strcat(fnameout,".kbm");

  if ((fp=fopen(fnamesrc,"r"))==NULL) {
    printf("Cannot open %s\n", fnamesrc);
    exit(1);
  }

//  fgets(s,sizeof(s),fp);
//  len=strlen(s);
//  s[len-1]=0;
//  strcpy(phkb.selkey, s);
//  phkb.selkeyN = strlen(s);

  while (!feof(fp)) {
    s[0]=0;
    fgets(s,sizeof(s),fp);
    len=strlen(s);
    if (!len)
      break;

    if (s[len-1]=='\n')
      s[--len]=0;

    if (!len)
      break;

    if (!pho_lookup(s, &num, &typ))
      p_err("err found %s", s);

    int utf8sz = utf8_sz(s);
    chk=s[utf8sz + 1];

    if (chk>='A' && chk<='Z')
      chk+=32;

    for(i=0;i<3;i++) {
      if (!phkb.phokbm[(int)chk][i].num) {
        phkb.phokbm[(int)chk][i].num=num;
        phkb.phokbm[(int)chk][i].typ=typ;

//       printf("%c %d %d  i:%d\n", chk, num, typ, i);
        break;
      }
    }
  }
  fclose(fp);

  if (strstr(fnamesrc, "pinyin"))
    phkb.phokbm[' '][0].num=0;
    phkb.phokbm[' '][0].typ=3;

  if ((fp=fopen(fnameout,"w"))==NULL) {
    printf("Cannot create %s\n", fnameout);
    exit(1);
  }

  fwrite(&phkb,sizeof(phkb),1,fp);
  fclose(fp);
  exit(0);
}
Exemplo n.º 17
0
int main(int argc, char **argv)
{
  gtk_init(&argc, &argv);

#if 1
  if (argc != 3)
    p_err("%s a_file.gtab outfile", argv[0]);
#endif
#if 1
  char *infile = argv[1];
  char *outfile = argv[2];
#else
  char *infile = "data/ar30.gtab";
  char *outfile = "l";
#endif

  FILE *fr;
  if ((fr=fopen(infile, "rb"))==NULL)
      p_err("cannot err open %s", infile);

  FILE *fp_out;
  if ((fp_out=fopen(outfile,"w"))==NULL) {
    printf("Cannot open %s", outfile);
    exit(-1);
  }

  struct TableHead th;
  fread(&th,1, sizeof(th), fr);
#if NEED_SWAP
  swap_byte_4(&th.version);
  swap_byte_4(&th.flag);
  swap_byte_4(&th.space_style);
  swap_byte_4(&th.KeyS);
  swap_byte_4(&th.MaxPress);
  swap_byte_4(&th.M_DUP_SEL);
  swap_byte_4(&th.DefC);
  for(i=0; i <= KeyNum; i++)
    swap_byte_4(&idx1[i]);
#endif
  int KeyNum = th.KeyS;
  dbg("keys %d\n",KeyNum);

  if (!th.keybits)
    th.keybits = 6;
  dbg("keybits:%d  maxPress:%d\n", th.keybits, th.MaxPress);

  int max_keyN;
  if (th.MaxPress*th.keybits > 32) {
    max_keyN = 64 / th.keybits;
    key64 = TRUE;
    dbg("it's a 64-bit .gtab\n");
  } else {
    max_keyN = 32 / th.keybits;
    key64 = FALSE;
  }

  dbg("key64:%d\n", key64);

  char kname[128][CH_SZ];
  char keymap[128];
  gtab_idx1_t idx1[256];
  static char kno[128];

  itN = th.DefC;

  bzero(keymap, sizeof(keymap));
  fread(keymap, 1, th.KeyS, fr);
  fread(kname, CH_SZ, th.KeyS, fr);
  fread(idx1, sizeof(gtab_idx1_t), KeyNum+1, fr);

  int i;
  for(i=0; i < th.KeyS; i++) {
    kno[keymap[i]] = i;
  }

  fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", th.keybits, th.MaxPress, keymap+1);

  if (key64) {
    fread(it64, sizeof(ITEM64), th.DefC, fr);
    qsort(it64, th.DefC, sizeof(ITEM64), qcmp_ch64);
  }
  else {
    fread(it, sizeof(ITEM), th.DefC, fr);
    qsort(it, th.DefC, sizeof(ITEM), qcmp_ch);
  }

  itN = th.DefC;

//  dbg("itN:%d\n", itN);
#if 0
  for(i=0; i < itN; i++) {
    printf("\n%d ", i);
    utf8_putchar(it64[i].ch);
  }
#endif

  fclose(fr);

  char fname[128];
  get_gcin_user_fname(tsin32_f, fname);

  FILE *fp;
  if ((fp=fopen(fname,"rb"))==NULL) {
    printf("Cannot open %s", fname);
    exit(-1);
  }


  while (!feof(fp)) {
    int i;
    phokey_t phbuf[MAX_PHRASE_LEN];
    u_char clen;
    usecount_t usecount;

    fread(&clen,1,1,fp);
    fread(&usecount, sizeof(usecount_t), 1,fp);
    fread(phbuf,sizeof(phokey_t), clen, fp);

    char str[MAX_PHRASE_LEN * CH_SZ + 1];
    int strN = 0;
    KKARR kk[MAX_PHRASE_LEN];
    KKARR64 kk64[MAX_PHRASE_LEN];
    gboolean has_err = FALSE;

    if (key64)
      bzero(kk64, sizeof(kk64));
    else
      bzero(kk, sizeof(kk));

//    dbg("clen %d\n", clen);
    for(i=0;i<clen;i++) {
      char ch[CH_SZ];

      int n = fread(ch, 1, 1, fp);
      if (n<=0)
        goto stop;

      int len=utf8_sz(ch);

      fread(&ch[1], 1, len-1, fp);
//      utf8_putchar(ch);

      if (key64) {
        if (!(kk64[i].arr = find_ch64(ch, &kk64[i].N)))
          has_err = TRUE;
      } else {
        if (!(kk[i].arr = find_ch(ch, &kk[i].N)))
          has_err = TRUE;
      }

      memcpy(str+strN, ch, len);
      strN+=len;
    }

    if (has_err) {
//      dbg("has_error\n");
      continue;
    }
#if 0
    for(i=0; i < clen; i++)
      printf("%d ", kk64[i].N);
    printf("\n");
#endif
    str[strN]=0;

    int permN;
    if (key64) {
      permN=kk64[0].N;
      for(i=1;i<clen;i++)
        permN *= kk64[i].N;
    }
    else {
      permN=kk[0].N;
      for(i=1;i<clen;i++)
        permN *= kk[i].N;
    }

    int z;
    for(z=0; z < permN; z++) {
      char vz[MAX_PHRASE_LEN];

      int tz = z;

      if (key64) {
        for(i=0; i < clen; i++) {
          vz[i] = tz % kk64[i].N;
          tz /= kk64[i].N;
        }
      } else {
        for(i=0; i < clen; i++) {
          vz[i] = tz % kk[i].N;
          tz /= kk[i].N;
        }
      }

      char kstr[512];
      kstr[0]=0;

      for(i=0;i<clen;i++) {
         char tkey[16];
         u_int64_t k=0;

         if (key64) {
           memcpy(&k, kk64[i].arr[vz[i]].key, 8);
         } else {
           u_int t;
           memcpy(&t, kk[i].arr[vz[i]].key, 4);
           k = t;
         }

         get_keymap_str(k, keymap, th.keybits, tkey);

         strcat(kstr, tkey);
         strcat(kstr, " ");
      }

      fprintf(fp_out,"%s %s%d\n", str, kstr, usecount);
    }
  }
stop:

  fclose(fp);
  fclose(fp_out);
  return 0;
}
Exemplo n.º 18
0
int main(int argc, char **argv)
{
  FILE *fp,*fw;
  char s[1024];
  u_char chbuf[MAX_PHRASE_LEN * CH_SZ];
  u_short phbuf[80];
  u_int phbuf32[80];
  u_int64_t phbuf64[80];
  int i,j,idx,len, ofs;
  u_short kk;
  u_int64_t kk64;
  int hashidx[TSIN_HASH_N];
  u_char clen;
  int lineCnt=0;
  gboolean reload = getenv("HIME_NO_RELOAD")==NULL;

  if (reload) {
    dbg("need reload\n");
  } else {
    dbg("NO_GTK_INIT\n");
  }

  if (getenv("NO_GTK_INIT")==NULL)
    gtk_init(&argc, &argv);

  dbg("enter %s\n", argv[0]);

  if (argc < 2)
    p_err("must specify input file");


  init_TableDir();

  if ((fp=fopen(argv[1], "rb"))==NULL) {
     printf("Cannot open %s\n", argv[1]);
     exit(-1);
  }

  skip_utf8_sigature(fp);
  char *outfile;
  int fofs = ftell(fp);
  myfgets(s, sizeof(s), fp);
  if (strstr(s, "!!pinyin")) {
    b_pinyin = TRUE;
    printf("is pinyin\n");
    load_pin_juyin();
  } else
    fseek(fp, fofs, SEEK_SET);

  fofs = ftell(fp);
  int keybits=0, maxkey=0;
  char keymap[128];
  char kno[128];
  bzero(kno, sizeof(kno));
  myfgets(s, sizeof(s), fp);
  puts(s);
  if (strstr(s, TSIN_GTAB_KEY)) {
    is_gtab = TRUE;
    lineCnt++;

    if (argc < 3)
      p_err("useage %s input_file output_file", argv[0]);

    outfile = argv[2];

    len=strlen((char *)s);
    if (s[len-1]=='\n')
      s[--len]=0;
    char aa[128];
    keymap[0]=' ';
    sscanf(s, "%s %d %d %s", aa, &keybits, &maxkey, keymap+1);
    for(i=0; keymap[i]; i++)
      kno[keymap[i]]=i;

    if (maxkey * keybits > 32)
      gtabkey64 = TRUE;
  } else {
    if (argc==3)
      outfile = argv[2];
    else
      outfile = "tsin32";

    fseek(fp, fofs, SEEK_SET);
  }



  INMD inmd, *cur_inmd = &inmd;

  char *cphbuf;
  if (is_gtab) {
    cur_inmd->keybits = keybits;
    if (gtabkey64) {
      cphbuf = (char *)phbuf64;
      phsz = 8;
      key_cmp = key_cmp64;
      hash_shift = TSIN_HASH_SHIFT_64;
      cur_inmd->key64 = TRUE;
    } else {
      cphbuf = (char *)phbuf32;
      phsz = 4;
      hash_shift = TSIN_HASH_SHIFT_32;
      key_cmp = key_cmp32;
      cur_inmd->key64 = FALSE;
    }
    cur_inmd->last_k_bitn = (((cur_inmd->key64 ? 64:32) / cur_inmd->keybits) - 1) * cur_inmd->keybits;
    dbg("cur_inmd->last_k_bitn %d\n", cur_inmd->last_k_bitn);
  } else {
      cphbuf = (char *)phbuf;
      phsz = 2;
      key_cmp = key_cmp16;
      hash_shift = TSIN_HASH_SHIFT;
  }

  dbg("phsz: %d\n", phsz);

  phcount=ofs=0;
  while (!feof(fp)) {
    usecount_t usecount=0;

    lineCnt++;

    myfgets((char *)s,sizeof(s),fp);
    len=strlen((char *)s);
    if (s[0]=='#')
      continue;

    if (strstr(s, TSIN_GTAB_KEY))
      continue;

    if (s[len-1]=='\n')
      s[--len]=0;

    if (len==0)
      continue;

    i=0;
    int chbufN=0;
    int charN = 0;
    while (s[i]!=' ' && i<len) {
      int len = utf8_sz((char *)&s[i]);

      memcpy(&chbuf[chbufN], &s[i], len);

      i+=len;
      chbufN+=len;
      charN++;
    }

    while ((i < len && s[i]==' ') || s[i]=='\t')
      i++;

    int phbufN=0;
    while (i<len && phbufN < charN && s[i]!=' ') {
      if (is_gtab) {
        kk64=0;
        int idx=0;
        while (s[i]!=' ' && i<len) {
          int k = kno[s[i]];
          kk64|=(u_int64_t)k << ( LAST_K_bitN - idx*keybits);
          i++;
          idx++;
        }

        if (phsz==8)
          phbuf64[phbufN++]=kk64;
        else
          phbuf32[phbufN++]=(u_int)kk64;
      } else {
        kk=0;
        if (b_pinyin) {
          kk = pinyin2phokey(s+i);
          while (s[i]!=' ' && i<len)
            i++;
        } else {
          while (s[i]!=' ' && i<len) {
            if (kk==(BACK_QUOTE_NO << 9))
              kk|=s[i];
            else
              kk |= lookup((u_char *)&s[i]);

            i+=utf8_sz((char *)&s[i]);
          }
        }

        phbuf[phbufN++]=kk;
      }

      i++;
    }

    if (phbufN!=charN) {
      p_err("%s   Line %d problem in phbufN!=chbufN %d != %d\n", s, lineCnt, phbufN, chbufN);
    }

    clen=phbufN;

    while (i<len && s[i]==' ')
      i++;

    if (i==len)
      usecount = 0;
    else
      usecount = atoi((char *)&s[i]);

    /*      printf("len:%d\n", clen); */

    if (phcount >= phidxsize) {
      phidxsize+=1024;
      if (!(phidx=(int *)realloc(phidx,phidxsize*4))) {
        puts("realloc err");
        exit(1);
      }
    }

    phidx[phcount++]=ofs;

    int new_bfN = ofs + 1 + sizeof(usecount_t)+ phsz * clen + chbufN;

    if (bfsize < new_bfN) {
      bfsize = new_bfN + 1024*1024;
      bf = (char *)realloc(bf, bfsize);
    }

    memcpy(&bf[ofs++],&clen,1);
    memcpy(&bf[ofs],&usecount, sizeof(usecount_t)); ofs+=sizeof(usecount_t);

    memcpy(&bf[ofs], cphbuf, clen * phsz);
    ofs+=clen * phsz;

    memcpy(&bf[ofs], chbuf, chbufN);
    ofs+=chbufN;
  }
  fclose(fp);

  /* dumpbf(bf,phidx); */

  puts("Sorting ....");

  qsort(phidx,phcount, sizeof(phidx[0]),qcmp);

  if (!(sf=(u_char *)malloc(bfsize))) {
    puts("malloc err");
    exit(1);
  }

  if (!(sidx=(int *)malloc(phidxsize*sizeof(int)))) {
    puts("malloc err");
    exit(1);
  }


  // delete duplicate
  ofs=0;
  j=0;
  for(i=0;i<phcount;i++) {
    idx = phidx[i];
    sidx[j]=ofs;
    len=bf[idx];
    int tlen = utf8_tlen(&bf[idx + 1 + sizeof(usecount_t) + phsz*len], len);
    clen= phsz*len + tlen + 1 + sizeof(usecount_t);

    if (i && !qcmp_eq(&phidx[i-1], &phidx[i]))
      continue;

    memcpy(&sf[ofs], &bf[idx], clen);
    j++;
    ofs+=clen;
  }

  phcount=j;
#if 1
  puts("Sorting by usecount ....");
  qsort(sidx, phcount, 4, qcmp_usecount);
#endif

  for(i=0;i<256;i++)
    hashidx[i]=-1;

  for(i=0;i<phcount;i++) {
    idx=sidx[i];
    idx+= 1 + sizeof(usecount_t);
    int v;

    if (phsz==2) {
      phokey_t kk;
      memcpy(&kk, &sf[idx], phsz);
      v = kk >> TSIN_HASH_SHIFT;
    } else if (phsz==4) {
Exemplo n.º 19
0
int main(int argc, char **argv)
{
  FILE *fp;
  int i;
  u_char clen;
  usecount_t usecount;
  gboolean pr_usecount = TRUE;
  char *fname;
  char *fname_out = NULL;

  if (argc <= 1) {
    printf("%s: file name expected\n", argv[0]);
    exit(1);
  }

  for(i=1; i < argc;) {
    if (!strcmp(argv[i], "-nousecount")) {
      i++;
      pr_usecount = FALSE;
    } else
    if (!strcmp(argv[i], "-o")) {
      if (i==argc-1)
        p_err("-o need out file name");
        fname_out = argv[i+1];
        i+=2;
    } else
      fname = argv[i++];
  }

  FILE *fp_out;

  if (!fname_out) {
    fp_out = stdout;
  } else {
    dbg("%s use %s\n", argv[0], fname_out);
    fp_out = fopen(fname_out, "w");
    if (!fp_out)
      p_err("cannot create %s\n", fname_out);
  }

  if ((fp=fopen(fname,"rb"))==NULL)
    p_err("Cannot open %s", argv[1]);


  TSIN_GTAB_HEAD head;
  int phsz = 2;

  fread(&head, sizeof(head), 1, fp);
  if (!strcmp(head.signature, TSIN_GTAB_KEY)) {
    if (head.maxkey * head.keybits > 32)
      phsz = 8;
    else
      phsz = 4;
  } else
    rewind(fp);

  if (phsz > 2) {
    fprintf(stderr, "phsz %d keybits:%d\n", phsz, head.keybits);
    fprintf(stderr, "keymap '%s'\n", head.keymap);
    fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", head.keybits, head.maxkey, head.keymap+1);
  }

  while (!feof(fp)) {
    phokey_t phbuf[MAX_PHRASE_LEN];
    u_int phbuf32[MAX_PHRASE_LEN];
    u_int64_t phbuf64[MAX_PHRASE_LEN];

    fread(&clen,1,1,fp);
    fread(&usecount, sizeof(usecount_t), 1,fp);
    if (!pr_usecount)
      usecount = 0;

    if (phsz==2)
      fread(phbuf, sizeof(phokey_t), clen, fp);
    else
    if (phsz==4)
      fread(phbuf32, 4, clen, fp);
    else
    if (phsz==8)
      fread(phbuf64, 8, clen, fp);


    for(i=0;i<clen;i++) {
      char ch[CH_SZ];

      int n = fread(ch, 1, 1, fp);
      if (n<=0)
        goto stop;

      int len=utf8_sz(ch);

      fread(&ch[1], 1, len-1, fp);

      int j;
      for(j=0; j < len; j++)
        fprintf(fp_out, "%c", ch[j]);
    }

    fprintf(fp_out, " ");
    for(i=0;i<clen;i++) {
      if (phsz==2)
        prph2(fp_out, phbuf[i]);
      else {
        u_int64_t k;
        if (phsz==4)
          k = phbuf32[i];
        else
          k = phbuf64[i];

        char tkey[16];
        get_keymap_str(k, head.keymap, head.keybits, tkey);
        fprintf(fp_out, "%s", tkey);
      }

      if (i!=clen-1)
        fprintf(fp_out, " ");
    }

    fprintf(fp_out, " %d\n", usecount);
  }

stop:
  fclose(fp);
  fclose(fp_out);

  exit(0);
}
Exemplo n.º 20
0
int main(int argc, char **argv)
{
  FILE *fp;
  int i;
  char clen;
  usecount_t usecount;
  gboolean pr_usecount = TRUE;
  char *fname;
  char *fname_out = NULL;

  gtk_init(&argc, &argv);

  if (argc <= 1) {
    printf("%s: file name expected\n", argv[0]);
    exit(1);
  }

  init_TableDir();

  gboolean b_pinyin = is_pinyin_kbm();

  for(i=1; i < argc;) {
    if (!strcmp(argv[i], "-nousecount")) {
      i++;
      pr_usecount = FALSE;
      b_pinyin = FALSE;
    } else
    if (!strcmp(argv[i], "-o")) {
      if (i==argc-1)
        p_err("-o need out file name");
        fname_out = argv[i+1];
        i+=2;
    } else
      fname = argv[i++];
  }

  FILE *fp_out;

  if (!fname_out) {
    fp_out = stdout;
  } else {
    dbg("%s use %s\n", argv[0], fname_out);
    fp_out = fopen(fname_out, "w");
    if (!fp_out)
      p_err("cannot create %s\n", fname_out);

  }

  if (b_pinyin)
    fprintf(fp_out, "!!pinyin\n");

  if ((fp=fopen(fname,"rb"))==NULL)
    p_err("Cannot open %s %s", fname, sys_err_strA());


  TSIN_GTAB_HEAD head;
  int phsz = 2;

  fread(&head, sizeof(head), 1, fp);
  if (!strcmp(head.signature, TSIN_GTAB_KEY)) {
    if (head.maxkey * head.keybits > 32)
      phsz = 8;
    else
      phsz = 4;
  } else
    rewind(fp);

  if (phsz > 2) {
    fprintf(stderr, "phsz %d keybits:%d\n", phsz, head.keybits);
    fprintf(stderr, "keymap '%s'\n", head.keymap);
    fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", head.keybits, head.maxkey, head.keymap+1);
  }

  while (!feof(fp)) {
    phokey_t phbuf[MAX_PHRASE_LEN];
    u_int phbuf32[MAX_PHRASE_LEN];
    u_int64_t phbuf64[MAX_PHRASE_LEN];
    gboolean is_deleted = FALSE;

    fread(&clen,1,1,fp);
    if (clen < 0) {
      clen = - clen;
      is_deleted = TRUE;
    }

    fread(&usecount, sizeof(usecount_t), 1,fp);
    if (!pr_usecount)
      usecount = 0;

    if (phsz==2)
      fread(phbuf, sizeof(phokey_t), clen, fp);
    else
    if (phsz==4)
      fread(phbuf32, 4, clen, fp);
    else
    if (phsz==8)
      fread(phbuf64, 8, clen, fp);


    char tt[512];
    int ttlen=0;
    tt[0]=0;
    for(i=0;i<clen;i++) {
      char ch[CH_SZ];

      int n = fread(ch, 1, 1, fp);
      if (n<=0)
        goto stop;

      int len=utf8_sz(ch);

      fread(&ch[1], 1, len-1, fp);

      memcpy(tt+ttlen, ch, len);
      ttlen+=len;
    }
    tt[ttlen]=0;

    if (!tt[0])
      continue;

    if (is_deleted)
      continue;

    fprintf(fp_out, "%s ", tt);

    for(i=0;i<clen;i++) {
      if (phsz==2) {
        if (b_pinyin) {
          char *t = phokey2pinyin(phbuf[i]);
//          dbg("z %s\n", t);
          fprintf(fp_out, "%s", t);
        } else
          prph2(fp_out, phbuf[i]);
      } else {
        u_int64_t k;
        if (phsz==4)
          k = phbuf32[i];
        else
          k = phbuf64[i];

        char tkey[16];
        get_keymap_str(k, head.keymap, head.keybits, tkey);
        fprintf(fp_out, "%s", tkey);
      }

      if (i!=clen-1)
        fprintf(fp_out, " ");
    }

    fprintf(fp_out, " %d\n", usecount);
  }

stop:
  fclose(fp);
  fclose(fp_out);

  exit(0);
}