String UCS4toUTF8(UInt32 ucs4char) { StringBuffer sb(5); // max 4 chars + null UCS4toUTF8(ucs4char, sb); return sb.releaseString(); }
GP<GStringRep> GStringRep::Unicode::create( void const * const xbuf, unsigned int bufsize, EncodeType t) { GP<GStringRep> gretval; GStringRep *retval=0; void const * const buf=checkmarks(xbuf,bufsize,t); if(buf && bufsize) { unsigned char const *eptr=(unsigned char *)buf; unsigned int maxutf8size=0; void const* const xeptr=(void const *)((size_t)eptr+bufsize); switch(t) { case XUCS4: case XUCS4BE: case XUCS4LE: case XUCS4_2143: case XUCS4_3412: { for(unsigned long w; (eptr<xeptr)&&(w=*(unsigned long const *)eptr); eptr+=sizeof(unsigned long)) { maxutf8size+=(w>0x7f)?6:1; } break; } case XUTF16: case XUTF16BE: case XUTF16LE: { for(unsigned short w; (eptr<xeptr)&&(w=*(unsigned short const *)eptr); eptr+=sizeof(unsigned short)) { maxutf8size+=3; } break; } case XUTF8: for(;(eptr<xeptr)&&*eptr;maxutf8size++,eptr++) EMPTY_LOOP; break; case XEBCDIC: for(;(eptr<xeptr)&&*eptr;eptr++) { maxutf8size+=(*eptr>0x7f)?2:1; } break; default: break; } unsigned char *utf8buf=0; GPBuffer<unsigned char> gutf8buf(utf8buf,maxutf8size+1); utf8buf[0]=0; if (maxutf8size) { unsigned char *optr=utf8buf; int len=0; unsigned char const *iptr=(unsigned char *)buf; unsigned short const *sptr=(unsigned short *)buf; unsigned long w; switch(t) { case XUCS4: for(; (iptr<eptr)&&(w=*(unsigned long const *)iptr); len++,iptr+=sizeof(unsigned long const)) { optr=UCS4toUTF8(w,optr); } break; case XUCS4BE: for(;(w=UCS4BEtoUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUCS4LE: for(;(w=UCS4LEtoUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUCS4_2143: for(;(w=UCS4_2143toUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUCS4_3412: for(;(w=UCS4_3412toUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUTF16: for(;(w=xUTF16toUCS4(sptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUTF16BE: for(;(w=UTF16BEtoUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUTF16LE: for(;(w=UTF16LEtoUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XUTF8: for(;(w=UTF8toUCS4(iptr,eptr));len++) { optr=UCS4toUTF8(w,optr); } break; case XEBCDIC: for(;(iptr<eptr)&&(w=*iptr++);len++) { optr=UCS4toUTF8(w,optr); } break; default: break; } const unsigned int size=(size_t)optr-(size_t)utf8buf; if(size) { retval=(gretval=GStringRep::Unicode::create(size)); memcpy(retval->data,utf8buf,size); }else { retval=(gretval=GStringRep::Unicode::create(1)); retval->size=size; } retval->data[size]=0; gutf8buf.resize(0); const size_t s=(size_t)eptr-(size_t)iptr; retval->set_remainder(iptr,s,t); } } if(!retval) { retval=(gretval=GStringRep::Unicode::create(1)); retval->data[0]=0; retval->size=0; retval->set_remainder(0,0,t); } return gretval; }
String UCS2toUTF8(UInt16 ucs2char) { // UCS2 and UCS4 are the same, only different sizes. return UCS4toUTF8(ucs2char); }