int gbk_to_unicode(uint16* out,const char* in,int n) { int i=0; int j=0; uint16 gbcode = 0; while(n-1>=i){ if (in[i]&0x80) { if(n-i>1){ gbcode=(in[i+1]<<8&0xff00)+(in[i]&0x00ff); out[j]=charsets_gbk_to_ucs((uint8 *)&gbcode); j++; i=i+2; } else{ i++; continue; } } else{ gbcode = in[i]&0x00ff; out[j] = gbcode; i++; j++; } } return j; }
/* * Convert the src string to UTF8 coding dst string, and cut to length */ int string2utf8(unsigned char *src, unsigned char* dst, unsigned int length) { unsigned char *pt; unsigned char ch; unsigned short ucode; unsigned int type; unsigned int len; len = 0; type = 0; pt = src; while(*pt) { pt = utf8decode(pt, &ucode); if(ucode < 0x4e00) { if(ucode == 0 || ucode > 0x7F) { type = 1; break; } } else if(ucode > 0x9FCF) { type = 1; break; } else len++; if(len >= 3) break; //There is enough UTF8, so it is, to save time(>_*) } if(type == 0) //UTF8 { while(*src) { ch = *src++; *dst++ = ch; if(ch < 0x80) { if(length > 1) length -= 1; else break; } else if (ch < 0xe0) { /* U-00000080 - U-000007FF, 2 bytes */ if(length > 2) length -= 2; else break; *dst++ = *src++; } else if (ch < 0xf0) { /* U-00000800 - U-0000FFFF, 3 bytes */ if(length > 3) length -= 3; else break; *dst++ = *src++; *dst++ = *src++; } else if (ch < 0xf5) { /* U-00010000 - U-001FFFFF, 4 bytes */ if(length > 4) length -= 4; else break; *dst++ = *src++; *dst++ = *src++; *dst++ = *src++; } else { break; } } *dst = '\0'; } else //assume it is GBK code { //GBK to UTF8 while(*src) { ch = *src; if(ch < 0x80) { if(length > 1) length -= 1; else break; *dst++= ch; src ++; } else { ucode = charsets_gbk_to_ucs(src); if (ucode < 0x800) //2 bytes { if(length > 2) length -= 2; else break; *dst++ = 0xC0 | ((ucode >> 6) & 0x1F); *dst++ = 0x80 | (ucode & 0x3F); } else //3 bytes { if(length > 3) length -= 3; else break; *dst++ = 0xE0 | (ucode >> 12); *dst++ = 0x80 | ((ucode >>6) & 0x3F); *dst++ = 0x80 | (ucode & 0x3F); } src += 2; } } *dst = '\0'; }