Example #1
0
int gbk_to_unicode(uint16* out,const char* in,int n)
{
	int i=0;
	int j=0;
	uint16 gbcode = 0;
	while(n-1>=i){
		if (in[i]&0x80) {
			if(n-i>1){
				gbcode=(in[i+1]<<8&0xff00)+(in[i]&0x00ff);
				out[j]=charsets_gbk_to_ucs((uint8 *)&gbcode);
				j++;
				i=i+2;
			}
			else{
				i++;
				continue;
			}
		}
		else{
			gbcode = in[i]&0x00ff;
			out[j] = gbcode;
			i++;
			j++;
		}
		
	}
	return j;
}
Example #2
0
/*
*	Convert the src string to UTF8 coding dst string, and cut to length
*/
int string2utf8(unsigned char *src, unsigned char* dst, unsigned int length)
{
	unsigned char *pt;
	unsigned char ch;
	unsigned short ucode;
	unsigned int type;
	unsigned int len;

	len = 0;
	type = 0;
	pt = src;
	while(*pt)
	{
		pt = utf8decode(pt, &ucode);
		if(ucode < 0x4e00) {
			if(ucode == 0 || ucode > 0x7F) {
				type = 1;
				break;
			}
		} else if(ucode > 0x9FCF) {
			type = 1;
			break;
		}
		else
			len++;

		if(len >= 3) break;	//There is enough UTF8, so it is, to save time(>_*)
	}

	if(type == 0)	//UTF8
	{
		while(*src)
		{
			ch = *src++;
			*dst++ = ch;

			if(ch < 0x80) {
				if(length > 1) length -= 1;
				else break;
			} else if (ch < 0xe0) { /* U-00000080 - U-000007FF, 2 bytes */
				if(length > 2) length -= 2;
				else break;
				*dst++ = *src++;
			} else if (ch < 0xf0) { /* U-00000800 - U-0000FFFF, 3 bytes */
				if(length > 3) length -= 3;
				else break;
				*dst++ = *src++;
				*dst++ = *src++;
			} else if (ch < 0xf5) { /* U-00010000 - U-001FFFFF, 4 bytes */
				if(length > 4) length -= 4;
				else break;
				*dst++ = *src++;
				*dst++ = *src++;
				*dst++ = *src++;
			} else {
				break;
			}
		}
		*dst = '\0';
	}
	else //assume it is GBK code
	{
		//GBK to UTF8
		while(*src)
		{
			ch = *src;
			if(ch < 0x80)
			{
				if(length > 1) length -= 1;
				else break;

				*dst++= ch;
				src ++;
			}
			else
			{
				ucode = charsets_gbk_to_ucs(src);

				if (ucode < 0x800) //2 bytes
				{
					if(length > 2) length -= 2;
					else break;
	
					*dst++ = 0xC0 | ((ucode >> 6) & 0x1F);
					*dst++ = 0x80 | (ucode & 0x3F);
				}
				else //3 bytes
				{
					if(length > 3) length -= 3;
					else break;

					*dst++ = 0xE0 | (ucode >> 12);
					*dst++ = 0x80 | ((ucode >>6) & 0x3F);
					*dst++ = 0x80 | (ucode & 0x3F);
				}

				src += 2;
			}
		}
		*dst = '\0';
	}