void wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st) { wc_ccs ccs; if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) { if (! WcOption.use_language_tag) return; if (ucs == WC_C_LANGUAGE_TAG) st->tag = Strnew_size(4); else if (ucs == WC_C_CANCEL_TAG) { st->tag = NULL; st->ntag = 0; } else if (st->tag && ucs >= WC_C_TAG_SPACE) Strcat_char(st->tag, (char)(ucs & 0x7f)); return; } if (st->tag) { st->ntag = wc_ucs_put_tag(st->tag->ptr); st->tag = NULL; } if (ucs < 0x80) { if (st->ntag) wtf_push(os, WC_CCS_UCS_TAG, wc_ucs_to_ucs_tag(ucs, st->ntag)); else Strcat_char(os, (char)ucs); } else { ccs = wc_ucs_to_ccs(ucs); if (st->ntag && ucs <= WC_C_UNICODE_END) { ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs); ucs = wc_ucs_to_ucs_tag(ucs, st->ntag); } wtf_push(os, ccs, ucs); } }
Str wc_conv_from_viet(Str is, wc_ces ces) { Str os; wc_uchar *sp = (wc_uchar *)is->ptr; wc_uchar *ep = sp + is->length; wc_uchar *p; wc_ccs ccs1 = WcCesInfo[WC_CCS_INDEX(ces)].gset[1].ccs; wc_ccs ccs2 = WcCesInfo[WC_CCS_INDEX(ces)].gset[2].ccs; wc_uint8 *map = NULL; switch (ces) { case WC_CES_TCVN_5712: map = wc_c0_tcvn57122_map; break; case WC_CES_VISCII_11: map = wc_c0_viscii112_map; break; case WC_CES_VPS: map = wc_c0_vps2_map; break; } wc_create_detect_map(ces, WC_FALSE); for (p = sp; p < ep && ! WC_DETECT_MAP[*p]; p++) ; if (p == ep) return is; os = Strnew_size(is->length); if (p > sp) Strcat_charp_n(os, is->ptr, (int)(p - sp)); for (; p < ep; p++) { if (*p & 0x80) wtf_push(os, ccs1, (wc_uint32)*p); else if (*p < 0x20 && map[*p]) wtf_push(os, ccs2, (wc_uint32)*p); else Strcat_char(os, (char)*p); } return os; }
Str wc_char_conv_from_gb18030(wc_uchar c, wc_status *st) { static Str os; static wc_uchar gb[4]; wc_uint32 gbk; wc_wchar_t cc; #ifdef USE_UNICODE wc_uint32 ucs; #endif if (st->state == -1) { st->state = WC_GB18030_NOSTATE; os = Strnew_size(8); } switch (st->state) { case WC_GB18030_NOSTATE: switch (WC_GB18030_MAP[c]) { case UB: gb[0] = c; st->state = WC_GB18030_MBYTE1; return NULL; case C1: break; default: Strcat_char(os, (char)c); break; } break; case WC_GB18030_MBYTE1: if (WC_GB18030_MAP[c] & LB) { gbk = ((wc_uint32)gb[0] << 8) | c; if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT) wtf_push(os, WC_CCS_GBK_EXT, gbk); else if (gb[0] >= 0xA1 && c >= 0xA1) wtf_push(os, wc_gb2312_or_gbk(gbk), gbk); else wtf_push(os, WC_CCS_GBK, gbk); } else if (WC_GB18030_MAP[c] == L4) { gb[1] = c; st->state = WC_GB18030_MBYTE2; return NULL; } break; case WC_GB18030_MBYTE2: if (WC_GB18030_MAP[c] == UB) { gb[2] = c; st->state = WC_GB18030_MBYTE3; return NULL; } break; case WC_GB18030_MBYTE3: if (WC_GB18030_MAP[c] == L4) { cc.ccs = WC_CCS_GB18030_W; cc.code = ((wc_uint32)gb[0] << 24) | ((wc_uint32)gb[1] << 16) | ((wc_uint32)gb[2] << 8) | c; #ifdef USE_UNICODE if (WcOption.gb18030_as_ucs && (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR) wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code); else #endif wtf_push(os, cc.ccs, cc.code); } break; } st->state = -1; return os; }
Str wc_conv_from_gb18030(Str is, wc_ces ces) { Str os; wc_uchar *sp = (wc_uchar *)is->ptr; wc_uchar *ep = sp + is->length; wc_uchar *p; int state = WC_GB18030_NOSTATE; wc_uint32 gbk; wc_wchar_t cc; #ifdef USE_UNICODE wc_uint32 ucs; #endif for (p = sp; p < ep && *p < 0x80; p++) ; if (p == ep) return is; os = Strnew_size(is->length); if (p > sp) Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); for (; p < ep; p++) { switch (state) { case WC_GB18030_NOSTATE: switch (WC_GB18030_MAP[*p]) { case UB: state = WC_GB18030_MBYTE1; break; case C1: wtf_push_unknown(os, p, 1); break; default: Strcat_char(os, (char)*p); break; } break; case WC_GB18030_MBYTE1: if (WC_GB18030_MAP[*p] & LB) { gbk = ((wc_uint32)*(p-1) << 8) | *p; if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT) wtf_push(os, WC_CCS_GBK_EXT, gbk); else if (*(p-1) >= 0xA1 && *p >= 0xA1) wtf_push(os, wc_gb2312_or_gbk(gbk), gbk); else wtf_push(os, WC_CCS_GBK, gbk); } else if (WC_GB18030_MAP[*p] == L4) { state = WC_GB18030_MBYTE2; break; } else wtf_push_unknown(os, p-1, 2); state = WC_GB18030_NOSTATE; break; case WC_GB18030_MBYTE2: if (WC_GB18030_MAP[*p] == UB) { state = WC_GB18030_MBYTE3; break; } else wtf_push_unknown(os, p-2, 3); state = WC_GB18030_NOSTATE; break; case WC_GB18030_MBYTE3: if (WC_GB18030_MAP[*p] == L4) { cc.ccs = WC_CCS_GB18030_W; cc.code = ((wc_uint32)*(p-3) << 24) | ((wc_uint32)*(p-2) << 16) | ((wc_uint32)*(p-1) << 8) | *p; #ifdef USE_UNICODE if (WcOption.gb18030_as_ucs && (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR) wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code); else #endif wtf_push(os, cc.ccs, cc.code); } else wtf_push_unknown(os, p-3, 4); state = WC_GB18030_NOSTATE; break; } } switch (state) { case WC_GB18030_MBYTE1: wtf_push_unknown(os, p-1, 1); break; case WC_GB18030_MBYTE2: wtf_push_unknown(os, p-2, 2); break; case WC_GB18030_MBYTE3: wtf_push_unknown(os, p-3, 3); break; } return os; }
Str wc_conv_from_hz( Str is, wc_ces ces ) { int eax; Str os; wc_uchar *sp = (wc_uchar*)is->ptr; wc_uchar *ep = &sp[ is->length ]; wc_uchar *p; int state = 0; p = sp; if ( p < ep ) { if ( p[0] >= 0 ) { if ( p[0] != '~' ) p++; else { if ( p == ep ) { is->ptr = (char*)is; return os; } else { os = Strnew_size( is->length ); if ( sp < p ) Strcat_charp_n( os, is->ptr, p - sp ); p++; if ( p < ep ) { switch ( state ) { default: break; case 0: if ( p[0] == '~' ) state = 1; else { if ( WC_ISO_MAP[ p[0] ] == '@' ) state = 5; else { if ( p[0] < 0 ) { wtf_push_unknown( os, p, 1 ); } if ( os->area_size <= os->length + 1 ) Strgrow( os ); os->ptr[ os->length ] = p[0]; os->length++; os->ptr[ os->length ] = 0; else { os->ptr[ os->length ] = p[0]; os->length++; os->ptr[ os->length ] = 0; } } } break; case 1: if ( p[0] == '{' ) state = 3; else { if ( p[0] == '~' ) { if ( os->area_size <= os->length + 1 ) Strgrow( os ); os->ptr[ os->length ] = p[0]; os->length++; os->ptr[ os->length ] = 0; state = 0; else { os->ptr[ os->length ] = p[0]; os->length++; os->ptr[ os->length ] = 0; state = 0; } } else { if ( p[0] == 10 ) { } else wtf_push_unknown( os, &p[ -1 ], 2 ); state = 0; } } break; case 2: if ( p[0] != '}' ) { if ( p[0] == 10 ) state = 0; else { if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 ) { wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) ); } wtf_push_unknown( os, &p[ -1 ], 2 ); state = 3; } } else state = 0; break; case 3: if ( p[0] == '~' ) state = 2; else { if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 ) state = 4; else wtf_push_unknown( os, p, 1 ); } break; case 4: if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 ) { wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) ); } wtf_push_unknown( os, &p[ -1 ], 2 ); state = 3; break; case 5: if ( WC_ISO_MAP[ p[0] ] == '@' ) { wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) ); } wtf_push_unknown( os, &p[ -1 ], 2 ); state = 0; break; } } else { if ( state <= 5 ) { if ( ( ( 1 << state ) & 54 ) != 0 ) wtf_push_unknown( os, &p[ -1 ], 1 ); os->ptr = (char*)os; return os; } } else { } } } } } }