Example #1
0
File: ucs.c Project: phantasea/w3m
void
wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st)
{
    wc_ccs ccs;

    if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) {
	if (! WcOption.use_language_tag)
	    return;
	if (ucs == WC_C_LANGUAGE_TAG)
	    st->tag = Strnew_size(4);
	else if (ucs == WC_C_CANCEL_TAG) {
	    st->tag = NULL;
	    st->ntag = 0;
	}  else if (st->tag && ucs >= WC_C_TAG_SPACE)
	    Strcat_char(st->tag, (char)(ucs & 0x7f));
	return;
    }
    if (st->tag) {
	st->ntag = wc_ucs_put_tag(st->tag->ptr);
	st->tag = NULL;
    }
    if (ucs < 0x80) {
	if (st->ntag)
	    wtf_push(os, WC_CCS_UCS_TAG,  wc_ucs_to_ucs_tag(ucs, st->ntag));
	else
	    Strcat_char(os, (char)ucs);
    } else {
	ccs = wc_ucs_to_ccs(ucs);
	if (st->ntag && ucs <= WC_C_UNICODE_END) {
	    ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs);
	    ucs = wc_ucs_to_ucs_tag(ucs, st->ntag);
	}
	wtf_push(os, ccs, ucs);
    }
}
Example #2
0
Str
wc_conv_from_viet(Str is, wc_ces ces)
{
    Str os;
    wc_uchar *sp = (wc_uchar *)is->ptr;
    wc_uchar *ep = sp + is->length;
    wc_uchar *p;
    wc_ccs ccs1 = WcCesInfo[WC_CCS_INDEX(ces)].gset[1].ccs;
    wc_ccs ccs2 = WcCesInfo[WC_CCS_INDEX(ces)].gset[2].ccs;
    wc_uint8 *map = NULL;

    switch (ces) {
    case WC_CES_TCVN_5712:
	map = wc_c0_tcvn57122_map;
	break;
    case WC_CES_VISCII_11:
	map = wc_c0_viscii112_map;
	break;
    case WC_CES_VPS:
	map = wc_c0_vps2_map;
	break;
    }

    wc_create_detect_map(ces, WC_FALSE);
    for (p = sp; p < ep && ! WC_DETECT_MAP[*p]; p++)
	;
    if (p == ep)
	return is;
    os = Strnew_size(is->length);
    if (p > sp)
	Strcat_charp_n(os, is->ptr, (int)(p - sp));

    for (; p < ep; p++) {
	if (*p & 0x80)
	    wtf_push(os, ccs1, (wc_uint32)*p);
	else if (*p < 0x20 && map[*p])
	    wtf_push(os, ccs2, (wc_uint32)*p);
	else
	    Strcat_char(os, (char)*p);
    }
    return os;
}
Example #3
0
Str
wc_char_conv_from_gb18030(wc_uchar c, wc_status *st)
{
    static Str os;
    static wc_uchar gb[4];
    wc_uint32 gbk;
    wc_wchar_t cc;
#ifdef USE_UNICODE
    wc_uint32 ucs;
#endif

    if (st->state == -1) {
	st->state = WC_GB18030_NOSTATE;
	os = Strnew_size(8);
    }

    switch (st->state) {
    case WC_GB18030_NOSTATE:
	switch (WC_GB18030_MAP[c]) {
	case UB:
	    gb[0] = c;
	    st->state = WC_GB18030_MBYTE1;
	    return NULL;
	case C1:
	    break;
	default:
	    Strcat_char(os, (char)c);
	    break;
	}
	break;
    case WC_GB18030_MBYTE1:
	if (WC_GB18030_MAP[c] & LB) {
	    gbk = ((wc_uint32)gb[0] << 8) | c;
	    if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
		wtf_push(os, WC_CCS_GBK_EXT, gbk);
	    else if (gb[0] >= 0xA1 && c >= 0xA1)
		wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
	    else
		wtf_push(os, WC_CCS_GBK, gbk);
	} else if (WC_GB18030_MAP[c] == L4) {
	    gb[1] = c;
	    st->state = WC_GB18030_MBYTE2;
	    return NULL;
	}
	break;
    case WC_GB18030_MBYTE2:
	if (WC_GB18030_MAP[c] == UB) {
	    gb[2] = c;
	    st->state = WC_GB18030_MBYTE3;
	    return NULL;
	}
	break;
    case WC_GB18030_MBYTE3:
	if (WC_GB18030_MAP[c] == L4) {
	    cc.ccs = WC_CCS_GB18030_W;
	    cc.code = ((wc_uint32)gb[0] << 24)
		    | ((wc_uint32)gb[1] << 16)
		    | ((wc_uint32)gb[2] << 8)
		    | c;
#ifdef USE_UNICODE
	    if (WcOption.gb18030_as_ucs &&
		(ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
		wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
	    else
#endif
	        wtf_push(os, cc.ccs, cc.code);
	}
	break;
    }
    st->state = -1;
    return os;
}
Example #4
0
Str
wc_conv_from_gb18030(Str is, wc_ces ces)
{
    Str os;
    wc_uchar *sp = (wc_uchar *)is->ptr;
    wc_uchar *ep = sp + is->length;
    wc_uchar *p;
    int state = WC_GB18030_NOSTATE;
    wc_uint32 gbk;
    wc_wchar_t cc;
#ifdef USE_UNICODE
    wc_uint32 ucs;
#endif

    for (p = sp; p < ep && *p < 0x80; p++) 
	;
    if (p == ep)
	return is;
    os = Strnew_size(is->length);
    if (p > sp)
	Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));

    for (; p < ep; p++) {
	switch (state) {
	case WC_GB18030_NOSTATE:
	    switch (WC_GB18030_MAP[*p]) {
	    case UB:
		state = WC_GB18030_MBYTE1;
		break;
	    case C1:
		wtf_push_unknown(os, p, 1);
		break;
	    default:
		Strcat_char(os, (char)*p);
		break;
	    }
	    break;
	case WC_GB18030_MBYTE1:
	    if (WC_GB18030_MAP[*p] & LB) {
		gbk = ((wc_uint32)*(p-1) << 8) | *p;
		if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
		    wtf_push(os, WC_CCS_GBK_EXT, gbk);
		else if (*(p-1) >= 0xA1 && *p >= 0xA1)
		    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
		else
		    wtf_push(os, WC_CCS_GBK, gbk);
	    } else if (WC_GB18030_MAP[*p] == L4) {
		state = WC_GB18030_MBYTE2;
		break;
	    } else
		wtf_push_unknown(os, p-1, 2);
	    state = WC_GB18030_NOSTATE;
	    break;
	case WC_GB18030_MBYTE2:
	    if (WC_GB18030_MAP[*p] == UB) {
		state = WC_GB18030_MBYTE3;
		break;
	    } else
		wtf_push_unknown(os, p-2, 3);
	    state = WC_GB18030_NOSTATE;
	    break;
	case WC_GB18030_MBYTE3:
	    if (WC_GB18030_MAP[*p] == L4) {
		cc.ccs = WC_CCS_GB18030_W;
		cc.code = ((wc_uint32)*(p-3) << 24)
		        | ((wc_uint32)*(p-2) << 16)
		        | ((wc_uint32)*(p-1) << 8)
		        | *p;
#ifdef USE_UNICODE
		if (WcOption.gb18030_as_ucs &&
		    (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
		    wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
		else
#endif
		    wtf_push(os, cc.ccs, cc.code);
	    } else
		wtf_push_unknown(os, p-3, 4);
	    state = WC_GB18030_NOSTATE;
	    break;
	}
    }
    switch (state) {
    case WC_GB18030_MBYTE1:
	wtf_push_unknown(os, p-1, 1);
	break;
    case WC_GB18030_MBYTE2:
	wtf_push_unknown(os, p-2, 2);
	break;
    case WC_GB18030_MBYTE3:
	wtf_push_unknown(os, p-3, 3);
	break;
    }
    return os;
}
Example #5
0
Str wc_conv_from_hz( Str is, wc_ces ces )
{
  int eax;
  Str os;
  wc_uchar *sp = (wc_uchar*)is->ptr;
  wc_uchar *ep = &sp[ is->length ];
  wc_uchar *p;
  int state = 0;
  p = sp;
  if ( p < ep )
  {
    if ( p[0] >= 0 )
    {
      if ( p[0] != '~' )
        p++;
      else
      {
        if ( p == ep )
        {
          is->ptr = (char*)is;
          return os;
        }
        else
        {
          os = Strnew_size( is->length );
          if ( sp < p )
            Strcat_charp_n( os, is->ptr, p - sp );
            p++;
            if ( p < ep )
            {
              switch ( state )
              {
              default:
                break;
              case 0:
                if ( p[0] == '~' )
                  state = 1;
                else
                {
                  if ( WC_ISO_MAP[ p[0] ] == '@' )
                    state = 5;
                  else
                  {
                    if ( p[0] < 0 )
                    {
                      wtf_push_unknown( os, p, 1 );
                    }
                    if ( os->area_size <= os->length + 1 )
                      Strgrow( os );
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                    else
                    {
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                    }
                  }
                }
                break;
              case 1:
                if ( p[0] == '{' )
                  state = 3;
                else
                {
                  if ( p[0] == '~' )
                  {
                    if ( os->area_size <= os->length + 1 )
                      Strgrow( os );
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                      state = 0;
                    else
                    {
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                      state = 0;
                    }
                  }
                  else
                  {
                    if ( p[0] == 10 )
                    {
                    }
                    else
                      wtf_push_unknown( os, &p[ -1 ], 2 );
                      state = 0;
                  }
                }
                break;
              case 2:
                if ( p[0] != '}' )
                {
                  if ( p[0] == 10 )
                    state = 0;
                  else
                  {
                    if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 )
                    {
                      wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) );
                    }
                    wtf_push_unknown( os, &p[ -1 ], 2 );
                    state = 3;
                  }
                }
                else
                  state = 0;
                break;
              case 3:
                if ( p[0] == '~' )
                  state = 2;
                else
                {
                  if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 )
                    state = 4;
                  else
                    wtf_push_unknown( os, p, 1 );
                }
                break;
              case 4:
                if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 )
                {
                  wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) );
                }
                wtf_push_unknown( os, &p[ -1 ], 2 );
                state = 3;
                break;
              case 5:
                if ( WC_ISO_MAP[ p[0] ] == '@' )
                {
                  wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) );
                }
                wtf_push_unknown( os, &p[ -1 ], 2 );
                state = 0;
                break;
              }
            }
            else
            {
              if ( state <= 5 )
              {
                if ( ( ( 1 << state ) & 54 ) != 0 )
                  wtf_push_unknown( os, &p[ -1 ], 1 );
                  os->ptr = (char*)os;
                  return os;
              }
            }
          else
          {
          }
        }
      }
    }
  }
}