char * justify( char *s, size_t len ) { char *p = s; if ( s && *s && (strlen( s ) > len) ) { BOOL kanjiFlag = FALSE; size_t i = 0; while ( *p ) { if ( ++i > len ) break; if ( !kanjiFlag && iskanji1( *p ) ) kanjiFlag = TRUE; else kanjiFlag = FALSE; *p++; } if ( kanjiFlag ) *(p - 1) = NUL; else *p = NUL; strcat( s, getLanguageCode() == 0x0411 ? "……" : "..." ); } return ( s ); }
static char * normalizeExtended( char *extended ) { if ( strlen( extended ) > MAX_EXTENDED_LENGTH ) { char *s = extended; BOOL kanjiFlag = FALSE; int i = 0; while ( *s ) { if ( ++i > MAX_EXTENDED_LENGTH ) break; if ( !kanjiFlag && iskanji1( *s ) ) kanjiFlag = TRUE; else kanjiFlag = FALSE; *s++; } if ( kanjiFlag ) *(s - 1) = NUL; else *s = NUL; } return ( extended ); }
/* * cha_fget_line - get line via fgets(). So it is really reading function :-) */ char * cha_fget_line(char *buffer, int bufsize, FILE * stream) { static unsigned char tmp_buf[INNER_BUFSIZE]; int last; if (fgets(tmp_buf, bufsize, stream) == NULL) return NULL; /* * remove the last extra character */ last = strlen(tmp_buf) - 1; if (iskanji1(tmp_buf, last)) { ungetc(tmp_buf[last], stream); tmp_buf[last] = 0; } /* * call convertor * NOTE: EUC string is short than JIS string. * if you want to other conversion, * you must care about string length. */ cha_jistoeuc(tmp_buf, buffer); return buffer; }
/* コマンドライン引数を分割する (レスポンスファイルは利用しない。)*/ void split_cmdline(const char *cmdline,std::vector<std::string> &files) { const char *ptr=cmdline; int quote_mode=0; files.clear(); while(isspace((unsigned char)*ptr)){ptr++;} while(*ptr){ std::string file; while(((!isspace((unsigned char)*ptr)) || quote_mode) && *ptr!='\0'){ if(*ptr=='"'){ quote_mode = !quote_mode; ptr++; }else{ #ifdef KANJI /* if 2 byte charactor then copy on more byte */ if(iskanji1(*ptr) && iskanji2(*(ptr+1))){ file.push_back(*ptr); ptr++; } #endif file.push_back(*ptr); ptr++; } } files.push_back(file); while(isspace((unsigned char)*ptr)){ptr++;} } }
/* 指定された文字列(Shift_JIS)を指定文字列長に丸める */ char * adjustStringLength( char *p, int len ) { int i; BOOL flag = FALSE; for ( i = 0; i < len; i++ ) { if ( (flag == FALSE) && iskanji1(p[i]) ) flag = TRUE; else flag = FALSE; } if ( flag == TRUE ) p[len - 1] = NUL; else p[len] = NUL; return ( p ); }
static void normalizeTag( char *tag ) { if ( strlen( tag ) > 20 ) { char *s = tag; BOOL kanjiFlag = FALSE; int i = 0; while ( *s ) { if ( ++i > 20 ) break; if ( !kanjiFlag && iskanji1( *s ) ) kanjiFlag = TRUE; else kanjiFlag = FALSE; *s++; } if ( kanjiFlag ) *(s - 1) = NUL; else *s = NUL; } }
/* putc() with code conversion */ int putc2(int c, FILE *fp) { static int num[NOFILE]; /* 0 : not in Kanji 1..4 : in JIS Kanji and num[] bytes are in store[][] -1 : in JIS Kanji and store[][] is empty */ static unsigned char store[NOFILE][4]; const int fd = fileno(fp); int ret = c, output_enc; #ifdef WIN32 if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) { if (sjisterminal) { if (is_internalUPTEX()) output_enc = ENC_UTF8; else output_enc = ENC_SJIS; } else #else if ((fp == stdout || fp == stderr) && !prior_file_enc) { #endif output_enc = get_terminal_enc(); } else output_enc = get_file_enc(); if (num[fd] > 0) { /* multi-byte char */ if (is_internalUPTEX() && iskanji1(c)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = 0; } store[fd][num[fd]] = c; num[fd]++; if (multistrlen(store[fd], num[fd], 0) == num[fd]) { long i = fromBUFF(store[fd], num[fd], 0); ret = put_multibyte(toENC(i, output_enc), fp); num[fd] = -1; } else if ((is_internalUPTEX() && num[fd] == 4) || (!is_internalUPTEX() && num[fd] == 2)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = -1; } } else if (iskanji1(c)) { /* first multi-byte char */ if (num[fd] == 0 && output_enc == ENC_JIS) { ret = put_multibyte(KANJI_IN, fp); } store[fd][0] = c; num[fd] = 1; } else { /* ASCII */ if (num[fd] < 0 && output_enc == ENC_JIS) { put_multibyte(KANJI_OUT, fp); } ret = putc(c, fp); num[fd] = 0; } return ret; } /* fputs() with code conversion */ int fputs2(const char *s, FILE *fp) { while (*s != '\0') { int ret = putc2((unsigned char)*s, fp); if (ret == EOF) return EOF; s++; } return 1; } static struct unget_st { int size; int buff[4]; } ungetbuff[NOFILE]; static int getc4(FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size == 0) #ifdef WIN32 { const int fd = fileno(fp); HANDLE hStdin; DWORD ret; wchar_t wc[2]; long c; static wchar_t wcbuf = L'\0'; if (!(fd == fileno(stdin) && _isatty(fd) && is_internalUPTEX())) return getc(fp); hStdin = GetStdHandle(STD_INPUT_HANDLE); if (wcbuf) { wc[0] = wcbuf; wcbuf = L'\0'; } else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0) return EOF; if (0xd800<=wc[0] && wc[0]<0xdc00) { if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0) return EOF; if (0xdc00<=wc[1] && wc[1]<0xe000) { c = UTF16StoUTF32(wc[0], wc[1]); } else { wcbuf = wc[1]; c = U_REPLACEMENT_CHARACTER; /* illegal upper surrogate pair */ } } else if (0xdc00<=wc[0] && wc[0]<0xe000) { c = U_REPLACEMENT_CHARACTER; /* illegal lower surrogate pair */ } else { c = wc[0]; } c = UCStoUTF8(c); /* always */ p->buff[p->size++]=BYTE4(c); if (BYTE3(c) != 0) p->buff[p->size++]=BYTE3(c); if (BYTE2(c) != 0) p->buff[p->size++]=BYTE2(c); if (BYTE1(c) != 0) p->buff[p->size++]=BYTE1(c); } #else return getc(fp); #endif return p->buff[--p->size]; } static int ungetc4(int c, FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size >= 4) return EOF; return p->buff[p->size++] = c; } static unsigned char *buffer; static long first, last; static boolean combin_voiced_sound(boolean semi) { int i, mblen; mblen = is_internalUPTEX() ? 3 : 2; if (last-mblen < first) return false; if (multistrlen(buffer,last,last-mblen) != mblen) return false; i = toUCS(fromBUFF(buffer,last,last-mblen)); i = get_voiced_sound(i, semi); if (i == 0) return false; i = toBUFF(fromUCS(i)); if (BYTE2(i) != 0) buffer[last-3] = BYTE2(i); /* always */ buffer[last-2] = BYTE3(i); /* always */ buffer[last-1] = BYTE4(i); return true; }
SQInteger SQLexer::Lex() { _lasttokenline = _currentline; while(CUR_CHAR != SQUIRREL_EOB) { #if !defined(_UNICODE) && defined(USESJIS) if (iskanji) { SQInteger c = CUR_CHAR; NEXT(); RETURN_TOKEN(c); } iskanji = false; #endif switch(CUR_CHAR){ case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue; case _SC('\n'): _currentline++; _prevtoken=_curtoken; _curtoken=_SC('\n'); NEXT(); _currentcolumn=1; continue; case _SC('/'): NEXT(); switch(CUR_CHAR){ case _SC('*'): NEXT(); LexBlockComment(); continue; case _SC('/'): do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB())); continue; case _SC('='): NEXT(); RETURN_TOKEN(TK_DIVEQ); continue; case _SC('>'): NEXT(); RETURN_TOKEN(TK_ATTR_CLOSE); continue; default: RETURN_TOKEN('/'); } case _SC('='): NEXT(); if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') } else { NEXT(); RETURN_TOKEN(TK_EQ); } case _SC('<'): NEXT(); if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) } else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); } else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); } else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); } //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); } else { RETURN_TOKEN('<') } case _SC('>'): NEXT(); if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);} else if(CUR_CHAR == _SC('>')){ NEXT(); if(CUR_CHAR == _SC('>')){ NEXT(); RETURN_TOKEN(TK_USHIFTR); } RETURN_TOKEN(TK_SHIFTR); } else { RETURN_TOKEN('>') } case _SC('!'): NEXT(); if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')} else { NEXT(); RETURN_TOKEN(TK_NE); } case _SC('@'): { SQInteger stype; NEXT(); if(CUR_CHAR != _SC('"')) Error(_SC("string expected")); if((stype=ReadString('"',true))!=-1) { RETURN_TOKEN(stype); } Error(_SC("error parsing the string")); } case _SC('"'): case _SC('\''): { SQInteger stype; if((stype=ReadString(CUR_CHAR,false))!=-1){ RETURN_TOKEN(stype); } Error(_SC("error parsing the string")); } case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'): case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'): {SQInteger ret = CUR_CHAR; NEXT(); RETURN_TOKEN(ret); } case _SC('.'): NEXT(); if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') } NEXT(); if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); } NEXT(); RETURN_TOKEN(TK_VARPARAMS); case _SC('&'): NEXT(); if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') } else { NEXT(); RETURN_TOKEN(TK_AND); } case _SC('|'): NEXT(); if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') } else { NEXT(); RETURN_TOKEN(TK_OR); } case _SC(':'): NEXT(); if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') } else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); } case _SC('*'): NEXT(); if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);} else RETURN_TOKEN('*'); case _SC('%'): NEXT(); if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);} else RETURN_TOKEN('%'); case _SC('-'): NEXT(); if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);} else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);} else RETURN_TOKEN('-'); case _SC('+'): NEXT(); if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);} else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);} else RETURN_TOKEN('+'); case SQUIRREL_EOB: return 0; default:{ if (scisdigit(CUR_CHAR)) { SQInteger ret = ReadNumber(); RETURN_TOKEN(ret); } else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) { SQInteger t = ReadID(); RETURN_TOKEN(t); } else { SQInteger c = CUR_CHAR; if (sciscntrl((int)c)) Error(_SC("unexpected character(control)")); #if !defined(_UNICODE) && defined(USESJIS) iskanji = iskanji1(c); #endif NEXT(); RETURN_TOKEN(c); } RETURN_TOKEN(0); } }
/* putc() with code conversion */ int putc2(int c, FILE *fp) { static int num[NOFILE]; /* 0 : not in Kanji 1..4 : in JIS Kanji and num[] bytes are in store[][] -1 : in JIS Kanji and store[][] is empty */ static unsigned char store[NOFILE][4]; const int fd = fileno(fp); int ret = c, output_enc; #ifdef WIN32 if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) { if (sjisterminal) { if (is_internalUPTEX()) output_enc = ENC_UTF8; else output_enc = ENC_SJIS; } else #else if ((fp == stdout || fp == stderr) && !prior_file_enc) { #endif output_enc = get_terminal_enc(); } else output_enc = get_file_enc(); if (num[fd] > 0) { /* multi-byte char */ if (is_internalUPTEX() && iskanji1(c)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = 0; } store[fd][num[fd]] = c; num[fd]++; if (multistrlen(store[fd], num[fd], 0) == num[fd]) { long i = fromBUFF(store[fd], num[fd], 0); ret = put_multibyte(toENC(i, output_enc), fp); num[fd] = -1; } else if ((is_internalUPTEX() && num[fd] == 4) || (!is_internalUPTEX() && num[fd] == 2)) { /* error */ ret = flush(store[fd], num[fd], fp); num[fd] = -1; } } else if (iskanji1(c)) { /* first multi-byte char */ if (num[fd] == 0 && output_enc == ENC_JIS) { ret = put_multibyte(KANJI_IN, fp); } store[fd][0] = c; num[fd] = 1; } else { /* ASCII */ if (num[fd] < 0 && output_enc == ENC_JIS) { put_multibyte(KANJI_OUT, fp); } ret = putc(c, fp); num[fd] = 0; } return ret; } /* fputs() with code conversion */ int fputs2(const char *s, FILE *fp) { while (*s != '\0') { int ret = putc2((unsigned char)*s, fp); if (ret == EOF) return EOF; s++; } return 1; } static struct unget_st { int size; int buff[4]; } ungetbuff[NOFILE]; static int getc4(FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size == 0) return getc(fp); return p->buff[--p->size]; } static int ungetc4(int c, FILE *fp) { struct unget_st *p = &ungetbuff[fileno(fp)]; if (p->size >= 4) return EOF; return p->buff[p->size++] = c; } static unsigned char *buffer; static long first, last; static boolean combin_voiced_sound(boolean semi) { int i; if (last-2 < first) return false; if (multistrlen(buffer,last,last-2) != 2) return false; i = toUCS(fromBUFF(buffer,last,last-2)); i = get_voiced_sound(i, semi); if (i == 0) return false; i = toBUFF(fromUCS(i)); buffer[last-2] = HI(i); buffer[last-1] = LO(i); return true; }