Exemplo n.º 1
0
Str
wc_conv_from_gb18030(Str is, wc_ces ces)
{
    Str os;
    wc_uchar *sp = (wc_uchar *)is->ptr;
    wc_uchar *ep = sp + is->length;
    wc_uchar *p;
    int state = WC_GB18030_NOSTATE;
    wc_uint32 gbk;
    wc_wchar_t cc;
#ifdef USE_UNICODE
    wc_uint32 ucs;
#endif

    for (p = sp; p < ep && *p < 0x80; p++) 
	;
    if (p == ep)
	return is;
    os = Strnew_size(is->length);
    if (p > sp)
	Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));

    for (; p < ep; p++) {
	switch (state) {
	case WC_GB18030_NOSTATE:
	    switch (WC_GB18030_MAP[*p]) {
	    case UB:
		state = WC_GB18030_MBYTE1;
		break;
	    case C1:
		wtf_push_unknown(os, p, 1);
		break;
	    default:
		Strcat_char(os, (char)*p);
		break;
	    }
	    break;
	case WC_GB18030_MBYTE1:
	    if (WC_GB18030_MAP[*p] & LB) {
		gbk = ((wc_uint32)*(p-1) << 8) | *p;
		if (wc_gbk_or_gbk_ext(gbk) == WC_CCS_GBK_EXT)
		    wtf_push(os, WC_CCS_GBK_EXT, gbk);
		else if (*(p-1) >= 0xA1 && *p >= 0xA1)
		    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
		else
		    wtf_push(os, WC_CCS_GBK, gbk);
	    } else if (WC_GB18030_MAP[*p] == L4) {
		state = WC_GB18030_MBYTE2;
		break;
	    } else
		wtf_push_unknown(os, p-1, 2);
	    state = WC_GB18030_NOSTATE;
	    break;
	case WC_GB18030_MBYTE2:
	    if (WC_GB18030_MAP[*p] == UB) {
		state = WC_GB18030_MBYTE3;
		break;
	    } else
		wtf_push_unknown(os, p-2, 3);
	    state = WC_GB18030_NOSTATE;
	    break;
	case WC_GB18030_MBYTE3:
	    if (WC_GB18030_MAP[*p] == L4) {
		cc.ccs = WC_CCS_GB18030_W;
		cc.code = ((wc_uint32)*(p-3) << 24)
		        | ((wc_uint32)*(p-2) << 16)
		        | ((wc_uint32)*(p-1) << 8)
		        | *p;
#ifdef USE_UNICODE
		if (WcOption.gb18030_as_ucs &&
		    (ucs = wc_gb18030_to_ucs(cc)) != WC_C_UCS4_ERROR)
		    wtf_push(os, WC_CCS_GB18030 | (wc_ucs_to_ccs(ucs) & ~WC_CCS_A_SET), cc.code);
		else
#endif
		    wtf_push(os, cc.ccs, cc.code);
	    } else
		wtf_push_unknown(os, p-3, 4);
	    state = WC_GB18030_NOSTATE;
	    break;
	}
    }
    switch (state) {
    case WC_GB18030_MBYTE1:
	wtf_push_unknown(os, p-1, 1);
	break;
    case WC_GB18030_MBYTE2:
	wtf_push_unknown(os, p-2, 2);
	break;
    case WC_GB18030_MBYTE3:
	wtf_push_unknown(os, p-3, 3);
	break;
    }
    return os;
}
Exemplo n.º 2
0
Str wc_conv_from_hz( Str is, wc_ces ces )
{
  int eax;
  Str os;
  wc_uchar *sp = (wc_uchar*)is->ptr;
  wc_uchar *ep = &sp[ is->length ];
  wc_uchar *p;
  int state = 0;
  p = sp;
  if ( p < ep )
  {
    if ( p[0] >= 0 )
    {
      if ( p[0] != '~' )
        p++;
      else
      {
        if ( p == ep )
        {
          is->ptr = (char*)is;
          return os;
        }
        else
        {
          os = Strnew_size( is->length );
          if ( sp < p )
            Strcat_charp_n( os, is->ptr, p - sp );
            p++;
            if ( p < ep )
            {
              switch ( state )
              {
              default:
                break;
              case 0:
                if ( p[0] == '~' )
                  state = 1;
                else
                {
                  if ( WC_ISO_MAP[ p[0] ] == '@' )
                    state = 5;
                  else
                  {
                    if ( p[0] < 0 )
                    {
                      wtf_push_unknown( os, p, 1 );
                    }
                    if ( os->area_size <= os->length + 1 )
                      Strgrow( os );
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                    else
                    {
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                    }
                  }
                }
                break;
              case 1:
                if ( p[0] == '{' )
                  state = 3;
                else
                {
                  if ( p[0] == '~' )
                  {
                    if ( os->area_size <= os->length + 1 )
                      Strgrow( os );
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                      state = 0;
                    else
                    {
                      os->ptr[ os->length ] = p[0];
                      os->length++;
                      os->ptr[ os->length ] = 0;
                      state = 0;
                    }
                  }
                  else
                  {
                    if ( p[0] == 10 )
                    {
                    }
                    else
                      wtf_push_unknown( os, &p[ -1 ], 2 );
                      state = 0;
                  }
                }
                break;
              case 2:
                if ( p[0] != '}' )
                {
                  if ( p[0] == 10 )
                    state = 0;
                  else
                  {
                    if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 )
                    {
                      wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) );
                    }
                    wtf_push_unknown( os, &p[ -1 ], 2 );
                    state = 3;
                  }
                }
                else
                  state = 0;
                break;
              case 3:
                if ( p[0] == '~' )
                  state = 2;
                else
                {
                  if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 )
                    state = 4;
                  else
                    wtf_push_unknown( os, p, 1 );
                }
                break;
              case 4:
                if ( ( WC_ISO_MAP[ p[0] & 127 ] & 255 ) == 0 )
                {
                  wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) );
                }
                wtf_push_unknown( os, &p[ -1 ], 2 );
                state = 3;
                break;
              case 5:
                if ( WC_ISO_MAP[ p[0] ] == '@' )
                {
                  wtf_push( os, 33089, p[0] | ( p[ -1 ] << 8 ) );
                }
                wtf_push_unknown( os, &p[ -1 ], 2 );
                state = 0;
                break;
              }
            }
            else
            {
              if ( state <= 5 )
              {
                if ( ( ( 1 << state ) & 54 ) != 0 )
                  wtf_push_unknown( os, &p[ -1 ], 1 );
                  os->ptr = (char*)os;
                  return os;
              }
            }
          else
          {
          }
        }
      }
    }
  }
}