Exemplo n.º 1
0
/* UCS to internal (EUC/SJIS/UPTEX) code conversion */
long fromUCS(long kcode)
{
    if (is_internalUPTEX()) return UCStoUPTEX(kcode);
    kcode = UCS2toJIS(kcode);
    if (kcode == 0) return 0;
    return fromJIS(kcode);
}
Exemplo n.º 2
0
/* check char range (kanji 1st) */
boolean iskanji1(int c)
{
    if (is_internalUPTEX()) return (isUTF8(2,1,c) ||
                                    isUTF8(3,1,c) ||
                                    isUTF8(4,1,c));
    if (is_internalSJIS()) return isSJISkanji1(c);
    /* EUC */              return isEUCkanji1(c);
}
Exemplo n.º 3
0
/* with not so strict range check */
int multibytelen (int first_byte)
{
    if (is_internalUPTEX()) {
        return UTF8length(first_byte);
    } else if (is_internalSJIS()) {
        if (isSJISkanji1(first_byte)) return 2;
    } else { /* EUC */
        if (isEUCkanji1(first_byte))  return 2;
    }
    return 1;
}
Exemplo n.º 4
0
/* buffer (EUC/SJIS/UTF-8) to internal (EUC/SJIS/UPTEX) code conversion */
long fromBUFF(unsigned char *s, int len, int pos)
{
    s += pos; len -= pos;
    if (is_internalUPTEX()) {
        if (UTF8Slength(s, len) < 0) return s[0];
        return UCStoUPTEX(UTF8StoUCS(s));
    }
    if (len < 2) return s[0];
    if (is_internalSJIS()) {
        if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return HILO(s[0], s[1]);
    } else { /* EUC */
        if (isEUCkanji1(s[0])  && isEUCkanji2(s[1]))  return HILO(s[0], s[1]);
    }
    return s[0];
}
Exemplo n.º 5
0
static int put_multibyte(long c, FILE *fp) {
#ifdef WIN32
    if (sjisterminal) {
        const int fd = fileno(fp);

        if ((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)) {
            HANDLE hStdout;
            DWORD ret, wclen;
            UINT cp;
            wchar_t buff[2];
            char str[4];
            int mblen;

            if (fd == fileno(stdout))
                hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
            else
                hStdout = GetStdHandle(STD_ERROR_HANDLE);

            mblen=0;
            if (BYTE1(c) != 0) str[mblen++]=BYTE1(c);
            if (BYTE2(c) != 0) str[mblen++]=BYTE2(c);
            if (BYTE3(c) != 0) str[mblen++]=BYTE3(c);
            /* always */       str[mblen++]=BYTE4(c);

#define CP_932     932
#define CP_UTF8    65001

            if (is_internalUPTEX())
                cp = CP_UTF8;
            else
                cp = CP_932;
            if (MultiByteToWideChar(cp, 0, str, mblen, buff, 2) == 0)
                return EOF;

            wclen = mblen > 3 ? 2 : 1;
            if (WriteConsoleW(hStdout, buff, wclen, &ret, NULL) == 0)
                return EOF;

            return BYTE4(c);
        }
    }
#endif

    if (BYTE1(c) != 0 && putc(BYTE1(c), fp) == EOF) return EOF;
    if (BYTE2(c) != 0 && putc(BYTE2(c), fp) == EOF) return EOF;
    if (BYTE3(c) != 0 && putc(BYTE3(c), fp) == EOF) return EOF;
    /* always */  return putc(BYTE4(c), fp);
}
Exemplo n.º 6
0
/* multi-byte char length in s[pos] */
int multistrlen(unsigned char *s, int len, int pos)
{
    s += pos; len -= pos;
    if (is_internalUPTEX()) {
        int ret = UTF8Slength(s, len);
        if (ret < 0) return 1;
        return ret;
    }
    if (len < 2) return 1;
    if (is_internalSJIS()) {
        if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return 2;
    } else { /* EUC */
        if (isEUCkanji1(s[0])  && isEUCkanji2(s[1]))  return 2;
    }
    return 1;
}
Exemplo n.º 7
0
/* check char range */
boolean ismultichr (int length, int nth, int c)
{
    if (is_internalUPTEX()) return isUTF8(length, nth, c);
    if (length == 2) {
        if (nth == 1) {
            if (is_internalSJIS()) return isSJISkanji1(c);
            /* EUC */              return isEUCkanji1(c);
        } else if (nth == 2) {
            if (is_internalSJIS()) return isSJISkanji2(c);
            /* EUC */              return isEUCkanji2(c);
        }
    }
    if ((length == 3 || length == 4) &&
        (0 < nth && nth <= length)) return false;
    fprintf(stderr, "ismultichr: unexpected param length=%d, nth=%d\n",
            length, nth);
    return false;
}
Exemplo n.º 8
0
static void fprint_euc_char(FILE *fp, const char a, const char b)
{
	if (is_internalUPTEX()) {  /* convert a character from EUC to UTF8 */
		int k = 0;
		unsigned char str[5];
		int chr = (unsigned char)a<<8 | (unsigned char)b;
		chr = (chr==0xffff) ? U_REPLACEMENT_CHARACTER : JIStoUCS2(chr & 0x7f7f);
		chr = UCStoUTF8(chr);
		/* if (BYTE1(chr) != 0) str[k++] = BYTE1(chr); */  /* do not happen */
		if (BYTE2(chr) != 0) str[k++] = BYTE2(chr);
		if (BYTE3(chr) != 0) str[k++] = BYTE3(chr);
		                     str[k++] = BYTE4(chr);
		                     str[k++] = '\0';
		fprintf(fp,"%s",str);
	}
	else
		fprintf(fp,"%c%c",a,b);
}
Exemplo n.º 9
0
/* internal (EUC/SJIS/UPTEX) to UCS code conversion */
long toUCS(long kcode)
{
    if (is_internalUPTEX()) return UPTEXtoUCS(kcode);
    return JIStoUCS2(toJIS(kcode));
}
Exemplo n.º 10
0
/* internal (EUC/SJIS/UPTEX) to EUC code conversion */
static long toEUC(long kcode)
{
    if (!is_internalUPTEX() && !is_internalSJIS()) return kcode;
    return JIStoEUC(toJIS(kcode));
}
Exemplo n.º 11
0
/* EUC to internal (EUC/SJIS/UPTEX) code conversion */
long fromEUC(long kcode)
{
    if (!is_internalUPTEX() && !is_internalSJIS()) return kcode;
    return fromJIS(EUCtoJIS(kcode));
}
Exemplo n.º 12
0
/* internal (EUC/SJIS/UPTEX) to JIS code conversion */
long toJIS(long kcode)
{
    if (is_internalUPTEX()) return UCS2toJIS(UPTEXtoUCS(kcode));
    if (is_internalSJIS())  return SJIStoJIS(kcode);
    /* EUC */               return EUCtoJIS(kcode);
}
Exemplo n.º 13
0
/* JIS to internal (EUC/SJIS/UPTEX) code conversion */
long fromJIS(long kcode)
{
    if (is_internalUPTEX()) return UCStoUPTEX(JIStoUCS2(kcode));
    if (is_internalSJIS())  return JIStoSJIS(kcode);
    /* EUC */               return JIStoEUC(kcode);
}
Exemplo n.º 14
0
/*   write ind file   */
void indwrite(char *filename, struct index *ind, int pagenum)
{
	int i,j,hpoint=0;
	char datama[2048],lbuff[BUFFERLEN];
	FILE *fp;
	int conv_euc_to_euc;

	if (filename && kpse_out_name_ok(filename)) fp=fopen(filename,"wb");
	else {
		fp=stdout;
#ifdef WIN32
		setmode(fileno(fp), _O_BINARY);
#endif
	}

	conv_euc_to_euc = is_internalUPTEX() ? 1 : 0;
	if (conv_euc_to_euc) set_enc_string(NULL, "euc");
	convert(atama,datama);
	if (conv_euc_to_euc) set_enc_string(NULL, "uptex");
	fputs(preamble,fp);

	if (fpage>0) {
		fprintf(fp,"%s%d%s",setpage_prefix,pagenum,setpage_suffix);
	}

	for (i=line_length=0;i<lines;i++) {
		if (i==0) {
			if (!((alphabet(ind[i].dic[0][0]))||(japanese(ind[i].dic[0])))) {
				if (lethead_flag) {
					if (symbol_flag && strlen(symbol)) {
						fprintf(fp,"%s%s%s",lethead_prefix,symbol,lethead_suffix);
					}
					else if (lethead_flag>0) {
						fprintf(fp,"%s%s%s",lethead_prefix,symhead_positive,lethead_suffix);
					}
					else if (lethead_flag<0) {
						fprintf(fp,"%s%s%s",lethead_prefix,symhead_negative,lethead_suffix);
					}
				}
				SPRINTF(lbuff,"%s%s",item_0,ind[i].idx[0]);
			}
			else if (alphabet(ind[i].dic[0][0])) {
				if (lethead_flag>0) {
					fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0],lethead_suffix);
				}
				else if (lethead_flag<0) {
					fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0]+32,lethead_suffix);
				}
				SPRINTF(lbuff,"%s%s",item_0,ind[i].idx[0]);
			}
			else if (japanese(ind[i].dic[0])) {
				if (lethead_flag) {
					fputs(lethead_prefix,fp);
					for (j=hpoint;j<(strlen(datama)/2);j++) {
						if ((unsigned char)ind[i].dic[0][1]<(unsigned char)datama[j*2+1]) {
							fprint_euc_char(fp,atama[(j-1)*2],atama[(j-1)*2+1]);
							hpoint=j;
							break;
						}
					}
					if (j==(strlen(datama)/2)) {
						fprint_euc_char(fp,atama[(j-1)*2],atama[(j-1)*2+1]);
					}
					fputs(lethead_suffix,fp);
				}
				SPRINTF(lbuff,"%s%s",item_0,ind[i].idx[0]);
				for (hpoint=0;hpoint<(strlen(datama)/2);hpoint++) {
					if ((unsigned char)ind[i].dic[0][1]<(unsigned char)datama[hpoint*2+1]) {
						break;
					}
				}
			}
			switch (ind[i].words) {
			case 1:
				SAPPENDF(lbuff,"%s",delim_0);
				break;

			case 2:
				SAPPENDF(lbuff,"%s%s",item_x1,ind[i].idx[1]);
				SAPPENDF(lbuff,"%s",delim_1);
				break;

			case 3:
				SAPPENDF(lbuff,"%s%s",item_x1,ind[i].idx[1]);
				SAPPENDF(lbuff,"%s%s",item_x2,ind[i].idx[2]);
				SAPPENDF(lbuff,"%s",delim_2);
				break;

			default:
				break;
			}
			printpage(ind,fp,i,lbuff);
		}
		else {
			if (!((alphabet(ind[i].dic[0][0]))||(japanese(ind[i].dic[0])))) {
				if ((alphabet(ind[i-1].dic[0][0]))||(japanese(ind[i-1].dic[0]))){
					fputs(group_skip,fp);
					if (lethead_flag && symbol_flag) {
						fprintf(fp,"%s%s%s",lethead_prefix,symbol,lethead_suffix);
					}
				}
			}
			else if (alphabet(ind[i].dic[0][0])) {
				if (ind[i].dic[0][0]!=ind[i-1].dic[0][0]) {
					fputs(group_skip,fp);
					if (lethead_flag>0) {
						fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0],lethead_suffix);
					}
					else if (lethead_flag<0) {
						fprintf(fp,"%s%c%s",lethead_prefix,ind[i].dic[0][0]+32,lethead_suffix);
					}
				}
			}
			else if (japanese(ind[i].dic[0])) {
				for (j=hpoint;j<(strlen(datama)/2);j++) {
					if ((unsigned char)(ind[i].dic[0][0]<=(unsigned char)datama[j*2])&&((unsigned char)ind[i].dic[0][1]<(unsigned char)datama[j*2+1])) {
						break;
					}
				}
				if ((j!=hpoint)||(j==0)) {
					hpoint=j;
					fputs(group_skip,fp);
					if (lethead_flag!=0) {
						fputs(lethead_prefix,fp);
						fprint_euc_char(fp,atama[(j-1)*2],atama[(j-1)*2+1]);
						fputs(lethead_suffix,fp);
					}
				}
			}

			switch (ind[i].words) {
			case 1:
				SAPPENDF(lbuff,"%s%s%s",item_0,ind[i].idx[0],delim_0);
				break;

			case 2:
				if (strcmp(ind[i-1].idx[0],ind[i].idx[0])!=0 || strcmp(ind[i-1].dic[0],ind[i].dic[0])!=0) {
					SAPPENDF(lbuff,"%s%s%s",item_0,ind[i].idx[0],item_x1);
				}
				else {
					if (ind[i-1].words==1) {
						SAPPENDF(lbuff,"%s",item_01);
					}
					else {
						SAPPENDF(lbuff,"%s",item_1);
					}
				}
				SAPPENDF(lbuff,"%s",ind[i].idx[1]);
				SAPPENDF(lbuff,"%s",delim_1);
				break;

			case 3:
				if (strcmp(ind[i-1].idx[0],ind[i].idx[0])!=0 || strcmp(ind[i-1].dic[0],ind[i].dic[0])!=0) {
					SAPPENDF(lbuff,"%s%s",item_0,ind[i].idx[0]);
					SAPPENDF(lbuff,"%s%s%s",item_x1,ind[i].idx[1],item_x2);
				}
				else if (ind[i-1].words==1) {
					SAPPENDF(lbuff,"%s%s%s",item_01,ind[i].idx[1],item_x2);
				}
				else if (strcmp(ind[i-1].idx[1],ind[i].idx[1])!=0 || strcmp(ind[i-1].dic[1],ind[i].dic[1])!=0) {
					if (ind[i-1].words==2) SAPPENDF(lbuff,"%s%s%s",item_1,ind[i].idx[1],item_12);
					else SAPPENDF(lbuff,"%s%s%s",item_1,ind[i].idx[1],item_x2);
				}
				else {
					SAPPENDF(lbuff,"%s",item_2);
				}
				SAPPENDF(lbuff,"%s%s",ind[i].idx[2],delim_2);
				break;

			default:
				break;
			}
			printpage(ind,fp,i,lbuff);
		}
	}
	fputs(postamble,fp);

	if (filename) fclose(fp);
}
Exemplo n.º 15
0
/* internal (EUC/SJIS/UPTEX) to buffer (EUC/SJIS/UTF-8) code conversion */
long toBUFF(long kcode)
{
    if (is_internalUPTEX()) kcode = UCStoUTF8(UPTEXtoUCS(kcode));
    return kcode;
}
Exemplo n.º 16
0
/* putc() with code conversion */
int putc2(int c, FILE *fp)
{
    static int num[NOFILE];
        /* 0    : not in Kanji
           1..4 : in JIS Kanji and num[] bytes are in store[][]
           -1   : in JIS Kanji and store[][] is empty */
    static unsigned char store[NOFILE][4];
    const int fd = fileno(fp);
    int ret = c, output_enc;

#ifdef WIN32
    if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) {
        if (sjisterminal) {
            if (is_internalUPTEX())
                output_enc = ENC_UTF8;
            else
                output_enc = ENC_SJIS;
        } else
#else
    if ((fp == stdout || fp == stderr) && !prior_file_enc) {
#endif

        output_enc = get_terminal_enc();
    } else
        output_enc = get_file_enc();

    if (num[fd] > 0) {        /* multi-byte char */
        if (is_internalUPTEX() && iskanji1(c)) { /* error */
            ret = flush(store[fd], num[fd], fp);
            num[fd] = 0;
        }
        store[fd][num[fd]] = c;
        num[fd]++;
        if (multistrlen(store[fd], num[fd], 0) == num[fd]) {
            long i = fromBUFF(store[fd], num[fd], 0);
            ret = put_multibyte(toENC(i, output_enc), fp);
            num[fd] = -1;
        } else if ((is_internalUPTEX() && num[fd] == 4) ||
                   (!is_internalUPTEX() && num[fd] == 2)) { /* error */
            ret = flush(store[fd], num[fd], fp);
            num[fd] = -1;
        }
    } else if (iskanji1(c)) { /* first multi-byte char */
        if (num[fd] == 0 && output_enc == ENC_JIS) {
            ret = put_multibyte(KANJI_IN, fp);
        }
        store[fd][0] = c;
        num[fd] = 1;
    } else {                  /* ASCII */
        if (num[fd] < 0 && output_enc == ENC_JIS) {
            put_multibyte(KANJI_OUT, fp);
        }
        ret = putc(c, fp);
        num[fd] = 0;
    }
    return ret;
}

/* fputs() with code conversion */
int fputs2(const char *s, FILE *fp)
{
    while (*s != '\0') {
        int ret = putc2((unsigned char)*s, fp);
        if (ret == EOF) return EOF;
        s++;
    }
    return 1;
}


static struct unget_st {
    int size;
    int buff[4];
} ungetbuff[NOFILE];

static int getc4(FILE *fp)
{
    struct unget_st *p = &ungetbuff[fileno(fp)];

    if (p->size == 0)
#ifdef WIN32
    {
        const int fd = fileno(fp);
        HANDLE hStdin;
        DWORD ret;
        wchar_t wc[2];
        long c;
        static wchar_t wcbuf = L'\0';

        if (!(fd == fileno(stdin) && _isatty(fd) && is_internalUPTEX()))
            return getc(fp);

        hStdin = GetStdHandle(STD_INPUT_HANDLE);
        if (wcbuf) {
            wc[0] = wcbuf;
            wcbuf = L'\0';
        }
        else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0)
            return EOF;
        if (0xd800<=wc[0] && wc[0]<0xdc00) {
            if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0)
                return EOF;
            if (0xdc00<=wc[1] && wc[1]<0xe000) {
                c = UTF16StoUTF32(wc[0], wc[1]);
            } else {
                wcbuf = wc[1];
                c = U_REPLACEMENT_CHARACTER;  /* illegal upper surrogate pair */
            }
        } else if (0xdc00<=wc[0] && wc[0]<0xe000) {
            c = U_REPLACEMENT_CHARACTER;      /* illegal lower surrogate pair */
        } else {
            c = wc[0];
        }
        c = UCStoUTF8(c);
        /* always */       p->buff[p->size++]=BYTE4(c);
        if (BYTE3(c) != 0) p->buff[p->size++]=BYTE3(c);
        if (BYTE2(c) != 0) p->buff[p->size++]=BYTE2(c);
        if (BYTE1(c) != 0) p->buff[p->size++]=BYTE1(c);
    }
#else
        return getc(fp);
#endif
    return p->buff[--p->size];
}

static int ungetc4(int c, FILE *fp)
{
    struct unget_st *p = &ungetbuff[fileno(fp)];

    if (p->size >= 4) return EOF;
    return p->buff[p->size++] = c;
}


static unsigned char *buffer;
static long first, last;
static boolean combin_voiced_sound(boolean semi)
{
    int i, mblen;

    mblen = is_internalUPTEX() ? 3 : 2;
    if (last-mblen < first) return false;
    if (multistrlen(buffer,last,last-mblen) != mblen) return false;
    i = toUCS(fromBUFF(buffer,last,last-mblen));
    i = get_voiced_sound(i, semi);
    if (i == 0) return false;
    i = toBUFF(fromUCS(i));
    if (BYTE2(i) != 0) buffer[last-3] = BYTE2(i);
    /* always */       buffer[last-2] = BYTE3(i);
    /* always */       buffer[last-1] = BYTE4(i);
    return true;
}
Exemplo n.º 17
0
/* putc() with code conversion */
int putc2(int c, FILE *fp)
{
    static int num[NOFILE];
        /* 0    : not in Kanji
           1..4 : in JIS Kanji and num[] bytes are in store[][]
           -1   : in JIS Kanji and store[][] is empty */
    static unsigned char store[NOFILE][4];
    const int fd = fileno(fp);
    int ret = c, output_enc;

#ifdef WIN32
    if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) {
        if (sjisterminal) {
            if (is_internalUPTEX())
                output_enc = ENC_UTF8;
            else
                output_enc = ENC_SJIS;
        } else
#else
    if ((fp == stdout || fp == stderr) && !prior_file_enc) {
#endif

        output_enc = get_terminal_enc();
    } else
        output_enc = get_file_enc();

    if (num[fd] > 0) {        /* multi-byte char */
        if (is_internalUPTEX() && iskanji1(c)) { /* error */
            ret = flush(store[fd], num[fd], fp);
            num[fd] = 0;
        }
        store[fd][num[fd]] = c;
        num[fd]++;
        if (multistrlen(store[fd], num[fd], 0) == num[fd]) {
            long i = fromBUFF(store[fd], num[fd], 0);
            ret = put_multibyte(toENC(i, output_enc), fp);
            num[fd] = -1;
        } else if ((is_internalUPTEX() && num[fd] == 4) ||
                   (!is_internalUPTEX() && num[fd] == 2)) { /* error */
            ret = flush(store[fd], num[fd], fp);
            num[fd] = -1;
        }
    } else if (iskanji1(c)) { /* first multi-byte char */
        if (num[fd] == 0 && output_enc == ENC_JIS) {
            ret = put_multibyte(KANJI_IN, fp);
        }
        store[fd][0] = c;
        num[fd] = 1;
    } else {                  /* ASCII */
        if (num[fd] < 0 && output_enc == ENC_JIS) {
            put_multibyte(KANJI_OUT, fp);
        }
        ret = putc(c, fp);
        num[fd] = 0;
    }
    return ret;
}

/* fputs() with code conversion */
int fputs2(const char *s, FILE *fp)
{
    while (*s != '\0') {
        int ret = putc2((unsigned char)*s, fp);
        if (ret == EOF) return EOF;
        s++;
    }
    return 1;
}


static struct unget_st {
    int size;
    int buff[4];
} ungetbuff[NOFILE];

static int getc4(FILE *fp)
{
    struct unget_st *p = &ungetbuff[fileno(fp)];

    if (p->size == 0) return getc(fp);
    return p->buff[--p->size];
}

static int ungetc4(int c, FILE *fp)
{
    struct unget_st *p = &ungetbuff[fileno(fp)];

    if (p->size >= 4) return EOF;
    return p->buff[p->size++] = c;
}


static unsigned char *buffer;
static long first, last;
static boolean combin_voiced_sound(boolean semi)
{
    int i;

    if (last-2 < first) return false;
    if (multistrlen(buffer,last,last-2) != 2) return false;
    i = toUCS(fromBUFF(buffer,last,last-2));
    i = get_voiced_sound(i, semi);
    if (i == 0) return false;
    i = toBUFF(fromUCS(i));
    buffer[last-2] = HI(i);
    buffer[last-1] = LO(i);
    return true;
}