const_string get_enc_string(void) { static char buffer[20]; /* enough large space */ if (get_file_enc() == get_internal_enc()) { return enc_to_string(get_file_enc()); } else { sprintf(buffer, "%s.%s", enc_to_string(get_file_enc()), enc_to_string(get_internal_enc())); return buffer; } }
static int get_terminal_enc(void) { if (terminal_enc == ENC_UNKNOWN) { char lang[16]; /* enough large space */ const char *s = getenv("LC_ALL"); if (s == NULL) s = getenv("LC_MESSAGES"); if (s == NULL) s = getenv("LANG"); if (s == NULL) s = getenv("LANGUAGE"); if (s == NULL) s = ""; if (strrchr(s, '.') != NULL) s = strrchr(s, '.') + 1; strncpy(lang, s, sizeof(lang) - 1); lang[sizeof(lang) - 1] = '\0'; if (strcasecmp(lang, "euc") == 0) terminal_enc = ENC_EUC; else if (strcasecmp(lang, "eucJP")== 0) terminal_enc = ENC_EUC; else if (strcasecmp(lang, "ujis") == 0) terminal_enc = ENC_EUC; else if (strcasecmp(lang, "sjis") == 0) terminal_enc = ENC_SJIS; else if (strcasecmp(lang, "utf8") == 0) terminal_enc = ENC_UTF8; else if (strcasecmp(lang, "UTF-8")== 0) terminal_enc = ENC_UTF8; else if (strcasecmp(lang, "jis") == 0) terminal_enc = ENC_JIS; else if (strcasecmp(lang, "ISO-2022-JP")== 0) terminal_enc = ENC_JIS; else terminal_enc = get_file_enc(); } return terminal_enc; }
/* input line with encoding conversion */ long input_line2(FILE *fp, unsigned char *buff, long pos, const long buffsize, int *lastchar) { long i; static boolean injis = false; const int fd = fileno(fp); if (infile_enc[fd] == ENC_UNKNOWN) { /* just after opened */ ungetbuff[fd].size = 0; if (isUTF8Nstream(fp)) infile_enc[fd] = ENC_UTF8; else infile_enc[fd] = get_file_enc(); } buffer = buff; first = last = pos; while (last < buffsize-30 && (i=getc4(fp)) != EOF && i!='\n' && i!='\r') { /* 30 is enough large size for one char */ /* attention: 4 times of write_hex() eats 16byte */ #ifdef WIN32 if (i == 0x1a && first == last && fd == fileno(stdin) && _isatty(fd)) { /* Ctrl+Z on console */ i = EOF; break; } else #endif if (i == ESC) { if ((i=getc4(fp)) == '$') { /* ESC '$' (Kanji-in) */ i = getc4(fp); if (i == '@' || i == 'B') { injis = true; } else { /* broken Kanji-in */ buffer[last++] = ESC; buffer[last++] = '$'; if (is_tail(&i, fp)) break; buffer[last++] = i; } } else if (i == '(') { /* ESC '(' (Kanji-out) */ i = getc4(fp); if (i == 'J' || i == 'B' || i == 'H') { injis = false; } else { /* broken Kanji-out */ buffer[last++] = ESC; buffer[last++] = '('; if (is_tail(&i, fp)) break; buffer[last++] = i; } } else { /* broken ESC */ buffer[last++] = ESC; if (is_tail(&i, fp)) break; buffer[last++] = i; } } else { /* rather than ESC */ if (injis) { /* in JIS */ long j = getc4(fp); if (is_tail(&j, fp)) { buffer[last++] = i; i = j; break; } else { /* JIS encoding */ i = fromJIS(HILO(i,j)); if (i == 0) i = fromUCS(U_REPLACEMENT_CHARACTER); write_multibyte(toBUFF(i)); } } else { /* normal */ if (infile_enc[fd] == ENC_SJIS && isSJISkanji1(i)) { get_sjis(i, fp); } else if (infile_enc[fd] == ENC_EUC && isEUCkanji1(i)) { get_euc(i, fp); } else if (infile_enc[fd] == ENC_UTF8 && UTF8length(i) > 1) { get_utf8(i, fp); } else { buffer[last++] = i; } } } } buffer[last] = '\0'; if (i == EOF || i == '\n' || i == '\r') injis = false; if (lastchar != NULL) *lastchar = i; return last; }
/* putc() with code conversion */ int putc2(int c, FILE *fp) { static int num[NOFILE]; /* 0 : not in Kanji 1..4 : in JIS Kanji and num[] bytes are in store[][] -1 : in JIS Kanji and store[][] is empty */ static unsigned char store[NOFILE][4]; const int fd = fileno(fp); int ret = c, output_enc; #ifdef WIN32 if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) { if (sjisterminal) { if (is_internalUPTEX()) output_enc = ENC_UTF8; else output_enc = ENC_SJIS; } else #else if ((fp == stdout || fp == stderr) && !prior_file_enc) { #endif output_enc = get_terminal_enc(); } else output_enc = get_file_enc(); if (num[fd] > 0) { /* multi-byte char */ if (is_internalUPTEX() && iskanji1(c)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = 0; } store[fd][num[fd]] = c; num[fd]++; if (multistrlen(store[fd], num[fd], 0) == num[fd]) { long i = fromBUFF(store[fd], num[fd], 0); ret = put_multibyte(toENC(i, output_enc), fp); num[fd] = -1; } else if ((is_internalUPTEX() && num[fd] == 4) || (!is_internalUPTEX() && num[fd] == 2)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = -1; } } else if (iskanji1(c)) { /* first multi-byte char */ if (num[fd] == 0 && output_enc == ENC_JIS) { ret = put_multibyte(KANJI_IN, fp); } store[fd][0] = c; num[fd] = 1; } else { /* ASCII */ if (num[fd] < 0 && output_enc == ENC_JIS) { put_multibyte(KANJI_OUT, fp); } ret = putc(c, fp); num[fd] = 0; } return ret; } /* fputs() with code conversion */ int fputs2(const char *s, FILE *fp) { while (*s != '\0') { int ret = putc2((unsigned char)*s, fp); if (ret == EOF) return EOF; s++; } return 1; } static struct unget_st { int size; int buff[4]; } ungetbuff[NOFILE]; static int getc4(FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size == 0) #ifdef WIN32 { const int fd = fileno(fp); HANDLE hStdin; DWORD ret; wchar_t wc[2]; long c; static wchar_t wcbuf = L'\0'; if (!(fd == fileno(stdin) && _isatty(fd) && is_internalUPTEX())) return getc(fp); hStdin = GetStdHandle(STD_INPUT_HANDLE); if (wcbuf) { wc[0] = wcbuf; wcbuf = L'\0'; } else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0) return EOF; if (0xd800<=wc[0] && wc[0]<0xdc00) { if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0) return EOF; if (0xdc00<=wc[1] && wc[1]<0xe000) { c = UTF16StoUTF32(wc[0], wc[1]); } else { wcbuf = wc[1]; c = U_REPLACEMENT_CHARACTER; /* illegal upper surrogate pair */ } } else if (0xdc00<=wc[0] && wc[0]<0xe000) { c = U_REPLACEMENT_CHARACTER; /* illegal lower surrogate pair */ } else { c = wc[0]; } c = UCStoUTF8(c); /* always */ p->buff[p->size++]=BYTE4(c); if (BYTE3(c) != 0) p->buff[p->size++]=BYTE3(c); if (BYTE2(c) != 0) p->buff[p->size++]=BYTE2(c); if (BYTE1(c) != 0) p->buff[p->size++]=BYTE1(c); } #else return getc(fp); #endif return p->buff[--p->size]; } static int ungetc4(int c, FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size >= 4) return EOF; return p->buff[p->size++] = c; } static unsigned char *buffer; static long first, last; static boolean combin_voiced_sound(boolean semi) { int i, mblen; mblen = is_internalUPTEX() ? 3 : 2; if (last-mblen < first) return false; if (multistrlen(buffer,last,last-mblen) != mblen) return false; i = toUCS(fromBUFF(buffer,last,last-mblen)); i = get_voiced_sound(i, semi); if (i == 0) return false; i = toBUFF(fromUCS(i)); if (BYTE2(i) != 0) buffer[last-3] = BYTE2(i); /* always */ buffer[last-2] = BYTE3(i); /* always */ buffer[last-1] = BYTE4(i); return true; }
/* putc() with code conversion */ int putc2(int c, FILE *fp) { static int num[NOFILE]; /* 0 : not in Kanji 1..4 : in JIS Kanji and num[] bytes are in store[][] -1 : in JIS Kanji and store[][] is empty */ static unsigned char store[NOFILE][4]; const int fd = fileno(fp); int ret = c, output_enc; #ifdef WIN32 if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) { if (sjisterminal) { if (is_internalUPTEX()) output_enc = ENC_UTF8; else output_enc = ENC_SJIS; } else #else if ((fp == stdout || fp == stderr) && !prior_file_enc) { #endif output_enc = get_terminal_enc(); } else output_enc = get_file_enc(); if (num[fd] > 0) { /* multi-byte char */ if (is_internalUPTEX() && iskanji1(c)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = 0; } store[fd][num[fd]] = c; num[fd]++; if (multistrlen(store[fd], num[fd], 0) == num[fd]) { long i = fromBUFF(store[fd], num[fd], 0); ret = put_multibyte(toENC(i, output_enc), fp); num[fd] = -1; } else if ((is_internalUPTEX() && num[fd] == 4) || (!is_internalUPTEX() && num[fd] == 2)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = -1; } } else if (iskanji1(c)) { /* first multi-byte char */ if (num[fd] == 0 && output_enc == ENC_JIS) { ret = put_multibyte(KANJI_IN, fp); } store[fd][0] = c; num[fd] = 1; } else { /* ASCII */ if (num[fd] < 0 && output_enc == ENC_JIS) { put_multibyte(KANJI_OUT, fp); } ret = putc(c, fp); num[fd] = 0; } return ret; } /* fputs() with code conversion */ int fputs2(const char *s, FILE *fp) { while (*s != '\0') { int ret = putc2((unsigned char)*s, fp); if (ret == EOF) return EOF; s++; } return 1; } static struct unget_st { int size; int buff[4]; } ungetbuff[NOFILE]; static int getc4(FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size == 0) return getc(fp); return p->buff[--p->size]; } static int ungetc4(int c, FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size >= 4) return EOF; return p->buff[p->size++] = c; } static unsigned char *buffer; static long first, last; static boolean combin_voiced_sound(boolean semi) { int i; if (last-2 < first) return false; if (multistrlen(buffer,last,last-2) != 2) return false; i = toUCS(fromBUFF(buffer,last,last-2)); i = get_voiced_sound(i, semi); if (i == 0) return false; i = toBUFF(fromUCS(i)); buffer[last-2] = HI(i); buffer[last-1] = LO(i); return true; }