Exemplo n.º 1
0
String UCS4toUTF8(UInt32 ucs4char)
{
	StringBuffer sb(5); // max 4 chars + null
	UCS4toUTF8(ucs4char, sb);
	return sb.releaseString();
}
Exemplo n.º 2
0
GP<GStringRep>
GStringRep::Unicode::create(
  void const * const xbuf,
  unsigned int bufsize,
  EncodeType t)
{
  GP<GStringRep> gretval;
  GStringRep *retval=0;
  void const * const buf=checkmarks(xbuf,bufsize,t); 
  if(buf && bufsize)
  {
    unsigned char const *eptr=(unsigned char *)buf;
    unsigned int maxutf8size=0;
    void const* const xeptr=(void const *)((size_t)eptr+bufsize);
    switch(t)
    {
      case XUCS4:
      case XUCS4BE:
      case XUCS4LE:
      case XUCS4_2143:
      case XUCS4_3412:
      {
        for(unsigned long w;
          (eptr<xeptr)&&(w=*(unsigned long const *)eptr);
          eptr+=sizeof(unsigned long))
        {
          maxutf8size+=(w>0x7f)?6:1;
        }
        break;
      }
      case XUTF16:
      case XUTF16BE:
      case XUTF16LE:
      {
        for(unsigned short w;
          (eptr<xeptr)&&(w=*(unsigned short const *)eptr);
          eptr+=sizeof(unsigned short))
        {
          maxutf8size+=3;
        }
        break;
      }
      case XUTF8:
        for(;(eptr<xeptr)&&*eptr;maxutf8size++,eptr++)
          EMPTY_LOOP;
        break;
      case XEBCDIC:
        for(;(eptr<xeptr)&&*eptr;eptr++)
        {
          maxutf8size+=(*eptr>0x7f)?2:1;
        }
        break;
      default:
        break;
    }
    unsigned char *utf8buf=0;
    GPBuffer<unsigned char> gutf8buf(utf8buf,maxutf8size+1);
    utf8buf[0]=0;
    if (maxutf8size)
    {
      unsigned char *optr=utf8buf;
      int len=0;
      unsigned char const *iptr=(unsigned char *)buf;
      unsigned short const *sptr=(unsigned short *)buf;
      unsigned long w;
      switch(t)
      {
        case XUCS4:
          for(;
            (iptr<eptr)&&(w=*(unsigned long const *)iptr);
            len++,iptr+=sizeof(unsigned long const))
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUCS4BE:
          for(;(w=UCS4BEtoUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUCS4LE:
          for(;(w=UCS4LEtoUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUCS4_2143:
          for(;(w=UCS4_2143toUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUCS4_3412:
          for(;(w=UCS4_3412toUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUTF16:
          for(;(w=xUTF16toUCS4(sptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUTF16BE:
          for(;(w=UTF16BEtoUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUTF16LE:
          for(;(w=UTF16LEtoUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XUTF8:
          for(;(w=UTF8toUCS4(iptr,eptr));len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        case XEBCDIC:
          for(;(iptr<eptr)&&(w=*iptr++);len++)
          {
            optr=UCS4toUTF8(w,optr);
          }
          break;
        default:
          break;
      }
      const unsigned int size=(size_t)optr-(size_t)utf8buf;
      if(size)
      {
		  retval=(gretval=GStringRep::Unicode::create(size));
        memcpy(retval->data,utf8buf,size);
      }else
      {
		  retval=(gretval=GStringRep::Unicode::create(1));
        retval->size=size;
      }
      retval->data[size]=0;
      gutf8buf.resize(0);
      const size_t s=(size_t)eptr-(size_t)iptr;
      retval->set_remainder(iptr,s,t);
    }
  }
  if(!retval)
  {
    retval=(gretval=GStringRep::Unicode::create(1));
    retval->data[0]=0;
    retval->size=0;
    retval->set_remainder(0,0,t);
  }
  return gretval;
}
Exemplo n.º 3
0
String UCS2toUTF8(UInt16 ucs2char)
{
	// UCS2 and UCS4 are the same, only different sizes.
	return UCS4toUTF8(ucs2char);
}