Example #1
0
std::string convertDVBUTF8(const char *data, int len, int table, int tsidonid)
{
	int newtable = 0;
	bool twochar = false;
	if (!len)
		return "";

	int i = 0, t = 0;

	if ( tsidonid )
	{
		std::map<int, int>::iterator it = TransponderDefaultMapping.find(tsidonid);
		if ( it != TransponderDefaultMapping.end() )
			table = it->second;

		twochar = TransponderUseTwoCharMapping.find(tsidonid) != TransponderUseTwoCharMapping.end();
	}
	
	//printf("table %d tsidonid %04x twochar %d : %20s\n", table, tsidonid, twochar, data);
	
	switch(data[0])
	{
		case 1 ... 12:
			newtable=data[i++]+4;
			//printf("(1..12)text encoded in ISO-8859-%d\n", table);
			break;
			
		case 0x10:
		{
			//printf("(0x10)text encoded in ISO-8859-%d\n",n);
			int n=(data[i+1]<<8)|(data[i+2]);
			i += 3;
			switch(n)
			{
			case 12:
			{} //eDebug("unsup. ISO8859-12 enc.", n);
			default:
				newtable=n;
				break;
			}
			break;
		}
		
		case 0x11:// Basic Multilingual Plane of ISO/IEC 10646-1 enc (UTF-16... Unicode)
			table = 65;
			tsidonid = 0;
			++i;
			break;
			
		case 0x12:
			++i;
			{} //printf("unsup. KSC 5601 enc.\n");
			break;
			
		case 0x13:
			++i;
			{} //printf("unsup. GB-2312-1980 enc.\n");
			break;
			
		case 0x14:
			++i;
			{} //printf("unsup. Big5 subset of ISO/IEC 10646-1 enc.\n");
			break;
			
		case 0x15: // UTF-8 encoding of ISO/IEC 10646-1
				return std::string((char*)data+1, len-1);
				
		case 0x1F:
			{
				std::string decoded_string = huffmanDecoder.decode((const unsigned char *)data, len);
				if (!decoded_string.empty())
					return decoded_string;
			}
			++i;
			break;
		case 0x0:
		case 0xD ... 0xF:
		case 0x16 ... 0x1E:	  
		{} //printf("reserved %d\n", data[0]);
		++i;
		break;
	}

	if(!table)
		table = newtable;
	if(table == 64 && (newtable != 0 ))
	{
		//for ISO6937
		table = newtable;
	}

	//printf("recode:::: tsidonid %X table %d two-char %d len %d\n", tsidonid, table, twochar, len);
	
	unsigned char res[2048];
	while (i < len)
	{
		unsigned long code = 0;

		if ( i + 1 < len && twochar && (code = doVideoTexSuppl(data[i], data[i+1])) ) 
		{
			i += 2;
		}

		if (!code) 
		{
			if (table == 65) 
			{ 
				// unicode
				if (i+1 < len) 
				{
					code = (data[i] << 8) | data[i+1];
					i += 2;
				}
			}
			else
				code = recode(data[i++], table);
		}
		
		if (!code)
			continue;
		
		// Unicode->UTF8 encoding
		if (code < 0x80) // identity ascii <-> utf8 mapping
			res[t++] = char(code);
		else if((table == 5) && (code == 0x8A))
			res[t++]= 0x20;
		else if ((code == 0x8A))
			res[t++]= '\n'; // 0x8a is vertical tab. Just use newline for now.
		else if((code >= 0x80) && (code <= 0x9F))
			continue;
		else if (code < 0x800) // two byte mapping
		{
			res[t++] = (code>>6)|0xC0;
			res[t++] = (code&0x3F)|0x80;
		} 
		else if (code < 0x10000) // three bytes mapping
Example #2
0
std::string convertDVBUTF8(const unsigned char *data, int len, int table, int tsidonid)
{
	if (!len)
		return "";

	int i=0, t=0;

	if ( tsidonid )
		encodingHandler.getTransponderDefaultMapping(tsidonid, table);

	switch(data[0])
	{
		case 1 ... 11:
			// For Thai providers, encoding char is present but faulty.
			if (table != 11)
				table=data[i]+4;
			++i;
//			eDebug("(1..11)text encoded in ISO-8859-%d",table);
			break;
		case 0x10:
		{
			int n=(data[++i]<<8);
			n |= (data[++i]);
//			eDebug("(0x10)text encoded in ISO-8859-%d",n);
			++i;
			switch(n)
			{
				case 12:
					eDebug("unsup. ISO8859-12 enc.");
					break;
				default:
					table=n;
					break;
			}
			break;
		}
		case 0x11: //  Basic Multilingual Plane of ISO/IEC 10646-1 enc  (UTF-16... Unicode)
			table = 65;
			tsidonid = 0;
			++i;
			break;
		case 0x12:
			++i;
			eDebug("unsup. KSC 5601 enc.");
			break;
		case 0x13:
			++i;
			eDebug("unsup. GB-2312-1980 enc.");
			break;
		case 0x14:
			++i;
			eDebug("unsup. Big5 subset of ISO/IEC 10646-1 enc.");
			break;
		case 0x15: // UTF-8 encoding of ISO/IEC 10646-1
			return std::string((char*)data+1, len-1);
		case 0x1F:
			{
				// Attempt to decode Freesat Huffman encoded string
				unsigned char *temp = (unsigned char *) huffmanDecoder.decode(data, len);
				if (temp)
				{
					int newlen = strlen((char*) temp);
					std::string decoded_string = convertDVBUTF8(temp, newlen, table, tsidonid);
					free(temp);
					return decoded_string;
				}
			}
			i++;
			eDebug("failed to decode bbc freesat huffman");
			break;
		case 0x0:
		case 0xC ... 0xF:
		case 0x16 ... 0x1E:
			eDebug("reserved %d", data[0]);
			++i;
			break;
	}

	bool useTwoCharMapping = !table || (tsidonid && encodingHandler.getTransponderUseTwoCharMapping(tsidonid));

	if (useTwoCharMapping && table == 5) { // i hope this dont break other transponders which realy use ISO8859-5 and two char byte mapping...
//		eDebug("Cyfra / Cyfrowy Polsat HACK... override given ISO8859-5 with ISO6397");
		table = 0;
	}

	unsigned char res[2048];
	while (i < len)
	{
		unsigned long code=0;
		if ( useTwoCharMapping && i+1 < len && (code=doVideoTexSuppl(data[i], data[i+1])) )
			i+=2;
		if (!code) {
			if (table == 65) { // unicode
				if (i+1 < len) {
					code=(data[i] << 8) | data[i+1];
					i += 2;
				}
			}
			else
				code=recode(data[i++], table);
		}
		if (!code)
			continue;
				// Unicode->UTF8 encoding
		if (code < 0x80) // identity ascii <-> utf8 mapping
			res[t++]=char(code);
		else if (code < 0x800) // two byte mapping
		{
			res[t++]=(code>>6)|0xC0;
			res[t++]=(code&0x3F)|0x80;
		} else if (code < 0x10000) // three bytes mapping