std::string convertDVBUTF8(const unsigned char *data, int len, int table, int tsidonid) { if (!len) return ""; int i=0, t=0; if ( tsidonid ) encodingHandler.getTransponderDefaultMapping(tsidonid, table); switch(data[0]) { case 1 ... 11: table=data[i++]+4; // eDebug("(1..11)text encoded in ISO-8859-%d",table); break; case 0x10: { int n=(data[++i]<<8); n |= (data[++i]); // eDebug("(0x10)text encoded in ISO-8859-%d",n); ++i; switch(n) { case 12: eDebug("unsup. ISO8859-12 enc."); break; default: table=n; break; } break; } case 0x11: // Basic Multilingual Plane of ISO/IEC 10646-1 enc (UTF-16... Unicode) table = 65; tsidonid = 0; ++i; break; case 0x12: ++i; eDebug("unsup. KSC 5601 enc."); break; case 0x13: ++i; eDebug("unsup. GB-2312-1980 enc."); break; case 0x14: ++i; eDebug("unsup. Big5 subset of ISO/IEC 10646-1 enc."); break; case 0x15: // UTF-8 encoding of ISO/IEC 10646-1 return std::string((char*)data+1, len-1); case 0x0: case 0xC ... 0xF: case 0x16 ... 0x1F: eDebug("reserved %d", data[0]); ++i; break; } bool useTwoCharMapping = !table || tsidonid && encodingHandler.getTransponderUseTwoCharMapping(tsidonid); if (useTwoCharMapping && table == 5) { // i hope this dont break other transponders which realy use ISO8859-5 and two char byte mapping... // eDebug("Cyfra / Cyfrowy Polsat HACK... override given ISO8859-5 with ISO6397"); table = 0; } unsigned char res[2048]; while (i < len) { unsigned long code=0; if ( useTwoCharMapping && i+1 < len && (code=doVideoTexSuppl(data[i], data[i+1])) ) i+=2; if (!code) { if (table == 65) { // unicode if (i+1 < len) { code=(data[i] << 8) | data[i+1]; i += 2; } } else code=recode(data[i++], table); } if (!code) continue; // Unicode->UTF8 encoding if (code < 0x80) // identity ascii <-> utf8 mapping res[t++]=char(code); else if (code < 0x800) // two byte mapping { res[t++]=(code>>6)|0xC0; res[t++]=(code&0x3F)|0x80; } else if (code < 0x10000) // three bytes mapping
std::string convertDVBUTF8(const char *data, int len, int table, int tsidonid) { int newtable = 0; bool twochar = false; if (!len) return ""; int i = 0, t = 0; if ( tsidonid ) { std::map<int, int>::iterator it = TransponderDefaultMapping.find(tsidonid); if ( it != TransponderDefaultMapping.end() ) table = it->second; twochar = TransponderUseTwoCharMapping.find(tsidonid) != TransponderUseTwoCharMapping.end(); } //printf("table %d tsidonid %04x twochar %d : %20s\n", table, tsidonid, twochar, data); switch(data[0]) { case 1 ... 12: newtable=data[i++]+4; //printf("(1..12)text encoded in ISO-8859-%d\n", table); break; case 0x10: { //printf("(0x10)text encoded in ISO-8859-%d\n",n); int n=(data[i+1]<<8)|(data[i+2]); i += 3; switch(n) { case 12: {} //eDebug("unsup. ISO8859-12 enc.", n); default: newtable=n; break; } break; } case 0x11:// Basic Multilingual Plane of ISO/IEC 10646-1 enc (UTF-16... Unicode) table = 65; tsidonid = 0; ++i; break; case 0x12: ++i; {} //printf("unsup. KSC 5601 enc.\n"); break; case 0x13: ++i; {} //printf("unsup. GB-2312-1980 enc.\n"); break; case 0x14: ++i; {} //printf("unsup. Big5 subset of ISO/IEC 10646-1 enc.\n"); break; case 0x15: // UTF-8 encoding of ISO/IEC 10646-1 return std::string((char*)data+1, len-1); case 0x1F: { std::string decoded_string = huffmanDecoder.decode((const unsigned char *)data, len); if (!decoded_string.empty()) return decoded_string; } ++i; break; case 0x0: case 0xD ... 0xF: case 0x16 ... 0x1E: {} //printf("reserved %d\n", data[0]); ++i; break; } if(!table) table = newtable; if(table == 64 && (newtable != 0 )) { //for ISO6937 table = newtable; } //printf("recode:::: tsidonid %X table %d two-char %d len %d\n", tsidonid, table, twochar, len); unsigned char res[2048]; while (i < len) { unsigned long code = 0; if ( i + 1 < len && twochar && (code = doVideoTexSuppl(data[i], data[i+1])) ) { i += 2; } if (!code) { if (table == 65) { // unicode if (i+1 < len) { code = (data[i] << 8) | data[i+1]; i += 2; } } else code = recode(data[i++], table); } if (!code) continue; // Unicode->UTF8 encoding if (code < 0x80) // identity ascii <-> utf8 mapping res[t++] = char(code); else if((table == 5) && (code == 0x8A)) res[t++]= 0x20; else if ((code == 0x8A)) res[t++]= '\n'; // 0x8a is vertical tab. Just use newline for now. else if((code >= 0x80) && (code <= 0x9F)) continue; else if (code < 0x800) // two byte mapping { res[t++] = (code>>6)|0xC0; res[t++] = (code&0x3F)|0x80; } else if (code < 0x10000) // three bytes mapping
eString convertDVBUTF8(const unsigned char *data, int len, int table, int tsidonid) { if (!len) return ""; int i=0, t=0; if ( tsidonid ) { std::map<int, int>::iterator it = eString::TransponderDefaultMapping.find(tsidonid); if ( it != eString::TransponderDefaultMapping.end() ) table = it->second; } switch(data[0]) { case 1 ... 12: table=data[i++]+4; // eDebug("(1..12)text encoded in ISO-8859-%d",table); break; case 0x10: { // eDebug("(0x10)text encoded in ISO-8859-%d",n); int n=(data[++i]<<8)|(data[++i]); ++i; switch(n) { case 12: eDebug("unsup. ISO8859-12 enc.", n); default: table=n; break; } break; } case 0x11: eDebug("unsup. Basic Multilingual Plane of ISO/IEC 10646-1 enc."); ++i; break; case 0x12: ++i; eDebug("unsup. KSC 5601 enc."); break; case 0x13: ++i; eDebug("unsup. GB-2312-1980 enc."); break; case 0x14: ++i; eDebug("unsup. Big5 subset of ISO/IEC 10646-1 enc."); break; case 0x0: case 0xD ... 0xF: case 0x15 ... 0x1F: eDebug("reserved %d", data[0]); ++i; break; } unsigned char res[2048]; while (i < len) { unsigned long code=0; if ( i+1 < len && tsidonid && eString::TransponderUseTwoCharMapping.find(tsidonid) != eString::TransponderUseTwoCharMapping.end() && (code=doVideoTexSuppl(data[i], data[i+1])) ) i+=2; if (!code) code=recode(data[i++], table); if (!code) continue; // Unicode->UTF8 encoding if (code < 0x80) // identity ascii <-> utf8 mapping res[t++]=char(code); else if (code < 0x800) // two byte mapping { res[t++]=(code>>6)|0xC0; res[t++]=(code&0x3F)|0x80; } else if (code < 0x10000) // three bytes mapping
std::string convertDVBUTF8(const char *data, int len, int table, int tsidonid) { int newtable = 0; bool twochar = false; if (!len) return ""; int i=0, t=0; if ( tsidonid ) { std::map<int, int>::iterator it = TransponderDefaultMapping.find(tsidonid); if ( it != TransponderDefaultMapping.end() ) table = it->second; twochar = TransponderUseTwoCharMapping.find(tsidonid) != TransponderUseTwoCharMapping.end(); } //printf("table %d tsidonid %04x twochar %d : %20s\n", table, tsidonid, twochar, data); switch(data[0]) { case 1 ... 12: newtable=data[i++]+4; // eDebug("(1..12)text encoded in ISO-8859-%d",table); break; case 0x10: { // eDebug("(0x10)text encoded in ISO-8859-%d",n); int n=(data[i+1]<<8)|(data[i+2]); i += 3; switch(n) { case 12: {} //eDebug("unsup. ISO8859-12 enc.", n); default: newtable=n; break; } break; } case 0x11: {} //eDebug("unsup. Basic Multilingual Plane of ISO/IEC 10646-1 enc."); ++i; break; case 0x12: ++i; {} //eDebug("unsup. KSC 5601 enc."); break; case 0x13: ++i; {} //eDebug("unsup. GB-2312-1980 enc."); break; case 0x14: ++i; {} //eDebug("unsup. Big5 subset of ISO/IEC 10646-1 enc."); break; case 0x0: case 0xD ... 0xF: case 0x15 ... 0x1F: {} //eDebug("reserved %d", data[0]); ++i; break; } if(!table) table = newtable; //dprintf("recode:::: tsidonid %X table %d two-char %d len %d\n", tsidonid, table, twochar, len); unsigned char res[2048]; while (i < len) { unsigned long code=0; if ( i+1 < len && twochar && (code=doVideoTexSuppl(data[i], data[i+1])) ) { i+=2; //dprintf("recode:::: doVideoTexSuppl code %lX\n", code); } if (!code) code=recode(data[i++], table); if (!code) continue; // Unicode->UTF8 encoding if (code < 0x80) // identity ascii <-> utf8 mapping res[t++]=char(code); else if((table == 5) && (code == 0x8A)) res[t++]= 0x20; else if (code < 0x800) // two byte mapping { res[t++]=(code>>6)|0xC0; res[t++]=(code&0x3F)|0x80; } else if (code < 0x10000) // three bytes mapping