/* UCS to internal (EUC/SJIS/UPTEX) code conversion */ long fromUCS(long kcode) { if (is_internalUPTEX()) return UCStoUPTEX(kcode); kcode = UCS2toJIS(kcode); if (kcode == 0) return 0; return fromJIS(kcode); }
/* check char range (kanji 1st) */ boolean iskanji1(int c) { if (is_internalUPTEX()) return (isUTF8(2,1,c) || isUTF8(3,1,c) || isUTF8(4,1,c)); if (is_internalSJIS()) return isSJISkanji1(c); /* EUC */ return isEUCkanji1(c); }
/* with not so strict range check */ int multibytelen (int first_byte) { if (is_internalUPTEX()) { return UTF8length(first_byte); } else if (is_internalSJIS()) { if (isSJISkanji1(first_byte)) return 2; } else { /* EUC */ if (isEUCkanji1(first_byte)) return 2; } return 1; }
/* buffer (EUC/SJIS/UTF-8) to internal (EUC/SJIS/UPTEX) code conversion */ long fromBUFF(unsigned char *s, int len, int pos) { s += pos; len -= pos; if (is_internalUPTEX()) { if (UTF8Slength(s, len) < 0) return s[0]; return UCStoUPTEX(UTF8StoUCS(s)); } if (len < 2) return s[0]; if (is_internalSJIS()) { if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return HILO(s[0], s[1]); } else { /* EUC */ if (isEUCkanji1(s[0]) && isEUCkanji2(s[1])) return HILO(s[0], s[1]); } return s[0]; }
static int put_multibyte(long c, FILE *fp) { #ifdef WIN32 if (sjisterminal) { const int fd = fileno(fp); if ((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)) { HANDLE hStdout; DWORD ret, wclen; UINT cp; wchar_t buff[2]; char str[4]; int mblen; if (fd == fileno(stdout)) hStdout = GetStdHandle(STD_OUTPUT_HANDLE); else hStdout = GetStdHandle(STD_ERROR_HANDLE); mblen=0; if (BYTE1(c) != 0) str[mblen++]=BYTE1(c); if (BYTE2(c) != 0) str[mblen++]=BYTE2(c); if (BYTE3(c) != 0) str[mblen++]=BYTE3(c); /* always */ str[mblen++]=BYTE4(c); #define CP_932 932 #define CP_UTF8 65001 if (is_internalUPTEX()) cp = CP_UTF8; else cp = CP_932; if (MultiByteToWideChar(cp, 0, str, mblen, buff, 2) == 0) return EOF; wclen = mblen > 3 ? 2 : 1; if (WriteConsoleW(hStdout, buff, wclen, &ret, NULL) == 0) return EOF; return BYTE4(c); } } #endif if (BYTE1(c) != 0 && putc(BYTE1(c), fp) == EOF) return EOF; if (BYTE2(c) != 0 && putc(BYTE2(c), fp) == EOF) return EOF; if (BYTE3(c) != 0 && putc(BYTE3(c), fp) == EOF) return EOF; /* always */ return putc(BYTE4(c), fp); }
/* multi-byte char length in s[pos] */ int multistrlen(unsigned char *s, int len, int pos) { s += pos; len -= pos; if (is_internalUPTEX()) { int ret = UTF8Slength(s, len); if (ret < 0) return 1; return ret; } if (len < 2) return 1; if (is_internalSJIS()) { if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return 2; } else { /* EUC */ if (isEUCkanji1(s[0]) && isEUCkanji2(s[1])) return 2; } return 1; }
/* check char range */ boolean ismultichr (int length, int nth, int c) { if (is_internalUPTEX()) return isUTF8(length, nth, c); if (length == 2) { if (nth == 1) { if (is_internalSJIS()) return isSJISkanji1(c); /* EUC */ return isEUCkanji1(c); } else if (nth == 2) { if (is_internalSJIS()) return isSJISkanji2(c); /* EUC */ return isEUCkanji2(c); } } if ((length == 3 || length == 4) && (0 < nth && nth <= length)) return false; fprintf(stderr, "ismultichr: unexpected param length=%d, nth=%d\n", length, nth); return false; }
static void fprint_euc_char(FILE *fp, const char a, const char b) { if (is_internalUPTEX()) { /* convert a character from EUC to UTF8 */ int k = 0; unsigned char str[5]; int chr = (unsigned char)a<<8 | (unsigned char)b; chr = (chr==0xffff) ? U_REPLACEMENT_CHARACTER : JIStoUCS2(chr & 0x7f7f); chr = UCStoUTF8(chr); /* if (BYTE1(chr) != 0) str[k++] = BYTE1(chr); */ /* do not happen */ if (BYTE2(chr) != 0) str[k++] = BYTE2(chr); if (BYTE3(chr) != 0) str[k++] = BYTE3(chr); str[k++] = BYTE4(chr); str[k++] = '\0'; fprintf(fp,"%s",str); } else fprintf(fp,"%c%c",a,b); }
/* internal (EUC/SJIS/UPTEX) to UCS code conversion */ long toUCS(long kcode) { if (is_internalUPTEX()) return UPTEXtoUCS(kcode); return JIStoUCS2(toJIS(kcode)); }
/* internal (EUC/SJIS/UPTEX) to EUC code conversion */ static long toEUC(long kcode) { if (!is_internalUPTEX() && !is_internalSJIS()) return kcode; return JIStoEUC(toJIS(kcode)); }
/* EUC to internal (EUC/SJIS/UPTEX) code conversion */ long fromEUC(long kcode) { if (!is_internalUPTEX() && !is_internalSJIS()) return kcode; return fromJIS(EUCtoJIS(kcode)); }
/* internal (EUC/SJIS/UPTEX) to JIS code conversion */ long toJIS(long kcode) { if (is_internalUPTEX()) return UCS2toJIS(UPTEXtoUCS(kcode)); if (is_internalSJIS()) return SJIStoJIS(kcode); /* EUC */ return EUCtoJIS(kcode); }
/* JIS to internal (EUC/SJIS/UPTEX) code conversion */ long fromJIS(long kcode) { if (is_internalUPTEX()) return UCStoUPTEX(JIStoUCS2(kcode)); if (is_internalSJIS()) return JIStoSJIS(kcode); /* EUC */ return JIStoEUC(kcode); }
/* write ind file */ void indwrite(char *filename, struct index *ind, int pagenum) { int i,j,hpoint=0; char datama[2048],lbuff[BUFFERLEN]; FILE *fp; int conv_euc_to_euc; if (filename && kpse_out_name_ok(filename)) fp=fopen(filename,"wb"); else { fp=stdout; #ifdef WIN32 setmode(fileno(fp), _O_BINARY); #endif } conv_euc_to_euc = is_internalUPTEX() ? 1 : 0; if (conv_euc_to_euc) set_enc_string(NULL, "euc"); convert(atama,datama); if (conv_euc_to_euc) set_enc_string(NULL, "uptex"); fputs(preamble,fp); if (fpage>0) { fprintf(fp,"%s%d%s",setpage_prefix,pagenum,setpage_suffix); } for (i=line_length=0;i<lines;i++) { if (i==0) { if (!((alphabet(ind[i].dic[0][0]))||(japanese(ind[i].dic[0])))) { if (lethead_flag) { if (symbol_flag && strlen(symbol)) { fprintf(fp,"%s%s%s",lethead_prefix,symbol,lethead_suffix); } else if (lethead_flag>0) { fprintf(fp,"%s%s%s",lethead_prefix,symhead_positive,lethead_suffix); } else if (lethead_flag<0) { fprintf(fp,"%s%s%s",lethead_prefix,symhead_negative,lethead_suffix); } } SPRINTF(lbuff,"%s%s",item_0,ind[i].idx[0]); } else if (alphabet(ind[i].dic[0][0])) { if (lethead_flag>0) { fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0],lethead_suffix); } else if (lethead_flag<0) { fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0]+32,lethead_suffix); } SPRINTF(lbuff,"%s%s",item_0,ind[i].idx[0]); } else if (japanese(ind[i].dic[0])) { if (lethead_flag) { fputs(lethead_prefix,fp); for (j=hpoint;j<(strlen(datama)/2);j++) { if ((unsigned char)ind[i].dic[0][1]<(unsigned char)datama[j*2+1]) { fprint_euc_char(fp,atama[(j-1)*2],atama[(j-1)*2+1]); hpoint=j; break; } } if (j==(strlen(datama)/2)) { fprint_euc_char(fp,atama[(j-1)*2],atama[(j-1)*2+1]); } fputs(lethead_suffix,fp); } SPRINTF(lbuff,"%s%s",item_0,ind[i].idx[0]); for (hpoint=0;hpoint<(strlen(datama)/2);hpoint++) { if ((unsigned char)ind[i].dic[0][1]<(unsigned char)datama[hpoint*2+1]) { break; } } } switch (ind[i].words) { case 1: SAPPENDF(lbuff,"%s",delim_0); break; case 2: SAPPENDF(lbuff,"%s%s",item_x1,ind[i].idx[1]); SAPPENDF(lbuff,"%s",delim_1); break; case 3: SAPPENDF(lbuff,"%s%s",item_x1,ind[i].idx[1]); SAPPENDF(lbuff,"%s%s",item_x2,ind[i].idx[2]); SAPPENDF(lbuff,"%s",delim_2); break; default: break; } printpage(ind,fp,i,lbuff); } else { if (!((alphabet(ind[i].dic[0][0]))||(japanese(ind[i].dic[0])))) { if ((alphabet(ind[i-1].dic[0][0]))||(japanese(ind[i-1].dic[0]))){ fputs(group_skip,fp); if (lethead_flag && symbol_flag) { fprintf(fp,"%s%s%s",lethead_prefix,symbol,lethead_suffix); } } } else if (alphabet(ind[i].dic[0][0])) { if (ind[i].dic[0][0]!=ind[i-1].dic[0][0]) { fputs(group_skip,fp); if (lethead_flag>0) { fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0],lethead_suffix); } else if (lethead_flag<0) { fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0]+32,lethead_suffix); } } } else if (japanese(ind[i].dic[0])) { for (j=hpoint;j<(strlen(datama)/2);j++) { if ((unsigned char)(ind[i].dic[0][0]<=(unsigned char)datama[j*2])&&((unsigned char)ind[i].dic[0][1]<(unsigned char)datama[j*2+1])) { break; } } if ((j!=hpoint)||(j==0)) { hpoint=j; fputs(group_skip,fp); if (lethead_flag!=0) { fputs(lethead_prefix,fp); fprint_euc_char(fp,atama[(j-1)*2],atama[(j-1)*2+1]); fputs(lethead_suffix,fp); } } } switch (ind[i].words) { case 1: SAPPENDF(lbuff,"%s%s%s",item_0,ind[i].idx[0],delim_0); break; case 2: if (strcmp(ind[i-1].idx[0],ind[i].idx[0])!=0 || strcmp(ind[i-1].dic[0],ind[i].dic[0])!=0) { SAPPENDF(lbuff,"%s%s%s",item_0,ind[i].idx[0],item_x1); } else { if (ind[i-1].words==1) { SAPPENDF(lbuff,"%s",item_01); } else { SAPPENDF(lbuff,"%s",item_1); } } SAPPENDF(lbuff,"%s",ind[i].idx[1]); SAPPENDF(lbuff,"%s",delim_1); break; case 3: if (strcmp(ind[i-1].idx[0],ind[i].idx[0])!=0 || strcmp(ind[i-1].dic[0],ind[i].dic[0])!=0) { SAPPENDF(lbuff,"%s%s",item_0,ind[i].idx[0]); SAPPENDF(lbuff,"%s%s%s",item_x1,ind[i].idx[1],item_x2); } else if (ind[i-1].words==1) { SAPPENDF(lbuff,"%s%s%s",item_01,ind[i].idx[1],item_x2); } else if (strcmp(ind[i-1].idx[1],ind[i].idx[1])!=0 || strcmp(ind[i-1].dic[1],ind[i].dic[1])!=0) { if (ind[i-1].words==2) SAPPENDF(lbuff,"%s%s%s",item_1,ind[i].idx[1],item_12); else SAPPENDF(lbuff,"%s%s%s",item_1,ind[i].idx[1],item_x2); } else { SAPPENDF(lbuff,"%s",item_2); } SAPPENDF(lbuff,"%s%s",ind[i].idx[2],delim_2); break; default: break; } printpage(ind,fp,i,lbuff); } } fputs(postamble,fp); if (filename) fclose(fp); }
/* internal (EUC/SJIS/UPTEX) to buffer (EUC/SJIS/UTF-8) code conversion */ long toBUFF(long kcode) { if (is_internalUPTEX()) kcode = UCStoUTF8(UPTEXtoUCS(kcode)); return kcode; }
/* putc() with code conversion */ int putc2(int c, FILE *fp) { static int num[NOFILE]; /* 0 : not in Kanji 1..4 : in JIS Kanji and num[] bytes are in store[][] -1 : in JIS Kanji and store[][] is empty */ static unsigned char store[NOFILE][4]; const int fd = fileno(fp); int ret = c, output_enc; #ifdef WIN32 if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) { if (sjisterminal) { if (is_internalUPTEX()) output_enc = ENC_UTF8; else output_enc = ENC_SJIS; } else #else if ((fp == stdout || fp == stderr) && !prior_file_enc) { #endif output_enc = get_terminal_enc(); } else output_enc = get_file_enc(); if (num[fd] > 0) { /* multi-byte char */ if (is_internalUPTEX() && iskanji1(c)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = 0; } store[fd][num[fd]] = c; num[fd]++; if (multistrlen(store[fd], num[fd], 0) == num[fd]) { long i = fromBUFF(store[fd], num[fd], 0); ret = put_multibyte(toENC(i, output_enc), fp); num[fd] = -1; } else if ((is_internalUPTEX() && num[fd] == 4) || (!is_internalUPTEX() && num[fd] == 2)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = -1; } } else if (iskanji1(c)) { /* first multi-byte char */ if (num[fd] == 0 && output_enc == ENC_JIS) { ret = put_multibyte(KANJI_IN, fp); } store[fd][0] = c; num[fd] = 1; } else { /* ASCII */ if (num[fd] < 0 && output_enc == ENC_JIS) { put_multibyte(KANJI_OUT, fp); } ret = putc(c, fp); num[fd] = 0; } return ret; } /* fputs() with code conversion */ int fputs2(const char *s, FILE *fp) { while (*s != '\0') { int ret = putc2((unsigned char)*s, fp); if (ret == EOF) return EOF; s++; } return 1; } static struct unget_st { int size; int buff[4]; } ungetbuff[NOFILE]; static int getc4(FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size == 0) #ifdef WIN32 { const int fd = fileno(fp); HANDLE hStdin; DWORD ret; wchar_t wc[2]; long c; static wchar_t wcbuf = L'\0'; if (!(fd == fileno(stdin) && _isatty(fd) && is_internalUPTEX())) return getc(fp); hStdin = GetStdHandle(STD_INPUT_HANDLE); if (wcbuf) { wc[0] = wcbuf; wcbuf = L'\0'; } else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0) return EOF; if (0xd800<=wc[0] && wc[0]<0xdc00) { if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0) return EOF; if (0xdc00<=wc[1] && wc[1]<0xe000) { c = UTF16StoUTF32(wc[0], wc[1]); } else { wcbuf = wc[1]; c = U_REPLACEMENT_CHARACTER; /* illegal upper surrogate pair */ } } else if (0xdc00<=wc[0] && wc[0]<0xe000) { c = U_REPLACEMENT_CHARACTER; /* illegal lower surrogate pair */ } else { c = wc[0]; } c = UCStoUTF8(c); /* always */ p->buff[p->size++]=BYTE4(c); if (BYTE3(c) != 0) p->buff[p->size++]=BYTE3(c); if (BYTE2(c) != 0) p->buff[p->size++]=BYTE2(c); if (BYTE1(c) != 0) p->buff[p->size++]=BYTE1(c); } #else return getc(fp); #endif return p->buff[--p->size]; } static int ungetc4(int c, FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size >= 4) return EOF; return p->buff[p->size++] = c; } static unsigned char *buffer; static long first, last; static boolean combin_voiced_sound(boolean semi) { int i, mblen; mblen = is_internalUPTEX() ? 3 : 2; if (last-mblen < first) return false; if (multistrlen(buffer,last,last-mblen) != mblen) return false; i = toUCS(fromBUFF(buffer,last,last-mblen)); i = get_voiced_sound(i, semi); if (i == 0) return false; i = toBUFF(fromUCS(i)); if (BYTE2(i) != 0) buffer[last-3] = BYTE2(i); /* always */ buffer[last-2] = BYTE3(i); /* always */ buffer[last-1] = BYTE4(i); return true; }
/* putc() with code conversion */ int putc2(int c, FILE *fp) { static int num[NOFILE]; /* 0 : not in Kanji 1..4 : in JIS Kanji and num[] bytes are in store[][] -1 : in JIS Kanji and store[][] is empty */ static unsigned char store[NOFILE][4]; const int fd = fileno(fp); int ret = c, output_enc; #ifdef WIN32 if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) { if (sjisterminal) { if (is_internalUPTEX()) output_enc = ENC_UTF8; else output_enc = ENC_SJIS; } else #else if ((fp == stdout || fp == stderr) && !prior_file_enc) { #endif output_enc = get_terminal_enc(); } else output_enc = get_file_enc(); if (num[fd] > 0) { /* multi-byte char */ if (is_internalUPTEX() && iskanji1(c)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = 0; } store[fd][num[fd]] = c; num[fd]++; if (multistrlen(store[fd], num[fd], 0) == num[fd]) { long i = fromBUFF(store[fd], num[fd], 0); ret = put_multibyte(toENC(i, output_enc), fp); num[fd] = -1; } else if ((is_internalUPTEX() && num[fd] == 4) || (!is_internalUPTEX() && num[fd] == 2)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = -1; } } else if (iskanji1(c)) { /* first multi-byte char */ if (num[fd] == 0 && output_enc == ENC_JIS) { ret = put_multibyte(KANJI_IN, fp); } store[fd][0] = c; num[fd] = 1; } else { /* ASCII */ if (num[fd] < 0 && output_enc == ENC_JIS) { put_multibyte(KANJI_OUT, fp); } ret = putc(c, fp); num[fd] = 0; } return ret; } /* fputs() with code conversion */ int fputs2(const char *s, FILE *fp) { while (*s != '\0') { int ret = putc2((unsigned char)*s, fp); if (ret == EOF) return EOF; s++; } return 1; } static struct unget_st { int size; int buff[4]; } ungetbuff[NOFILE]; static int getc4(FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size == 0) return getc(fp); return p->buff[--p->size]; } static int ungetc4(int c, FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size >= 4) return EOF; return p->buff[p->size++] = c; } static unsigned char *buffer; static long first, last; static boolean combin_voiced_sound(boolean semi) { int i; if (last-2 < first) return false; if (multistrlen(buffer,last,last-2) != 2) return false; i = toUCS(fromBUFF(buffer,last,last-2)); i = get_voiced_sound(i, semi); if (i == 0) return false; i = toBUFF(fromUCS(i)); buffer[last-2] = HI(i); buffer[last-1] = LO(i); return true; }