librevenge::RVNGString libvisio::VSDMetaData::readCodePageString(librevenge::RVNGInputStream *input) { uint32_t size = readU32(input); if (size > getRemainingLength(input)) size = getRemainingLength(input); if (size == 0) return librevenge::RVNGString(); std::vector<unsigned char> characters; for (uint32_t i = 0; i < size; ++i) characters.push_back(readU8(input)); uint32_t codepage = getCodePage(); librevenge::RVNGString string; if (codepage == 65001) { // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx // says this is UTF-8. characters.push_back(0); string.append(reinterpret_cast<const char *>(characters.data())); } else { UErrorCode status = U_ZERO_ERROR; UConverter *conv = nullptr; switch (codepage) { case 1252: // http://msdn.microsoft.com/en-us/goglobal/bb964654 conv = ucnv_open("windows-1252", &status); break; } if (U_SUCCESS(status) && conv) { assert(!characters.empty()); const auto *src = (const char *)&characters[0]; const char *srcLimit = (const char *)src + characters.size(); while (src < srcLimit) { UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status); if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character)) appendUCS4(string, ucs4Character); } } if (conv) ucnv_close(conv); } return string; }
static void U_CALLCONV nameAliasesLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { char *name; int16_t length=0; static uint32_t prevCode=0; uint32_t code=0; if(U_FAILURE(*pErrorCode)) { return; } /* get the character code */ code=uprv_strtoul(fields[0][0], NULL, 16); /* get the character name */ name=fields[1][0]; length=getName(&name, fields[1][1]); if(length==0 || length>=sizeof(cpNameAliases[cpNameAliasesTop].nameAlias)) { fprintf(stderr, "gennames: error - name alias %s empty or too long for code point U+%04lx\n", name, (unsigned long)code); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } /* check for non-character code points */ if(!U_IS_UNICODE_CHAR(code)) { fprintf(stderr, "gennames: error - name alias for non-character code point U+%04lx\n", (unsigned long)code); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } /* check that the code points (code) are in ascending order */ if(code<=prevCode && code>0) { fprintf(stderr, "gennames: error - NameAliases entries out of order, U+%04lx after U+%04lx\n", (unsigned long)code, (unsigned long)prevCode); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } prevCode=code; if(cpNameAliasesTop>=LENGTHOF(cpNameAliases)) { fprintf(stderr, "gennames: error - too many name aliases\n"); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } cpNameAliases[cpNameAliasesTop].code=code; uprv_memcpy(cpNameAliases[cpNameAliasesTop].nameAlias, name, length); cpNameAliases[cpNameAliasesTop].nameAlias[length]=0; ++cpNameAliasesTop; parseName(name, length); }
bool readUTFChar(const char* str, int* begin, int length, unsigned* codePointOut) { int codePoint; // Avoids warning when U8_NEXT writes -1 to it. U8_NEXT(str, *begin, length, codePoint); *codePointOut = static_cast<unsigned>(codePoint); // The ICU macro above moves to the next char, we want to point to the last // char consumed. (*begin)--; // Validate the decoded value. if (U_IS_UNICODE_CHAR(codePoint)) return true; *codePointOut = kUnicodeReplacementCharacter; return false; }
bool readUTFChar(const UChar* str, int* begin, int length, unsigned* codePoint) { if (U16_IS_SURROGATE(str[*begin])) { if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || !U16_IS_TRAIL(str[*begin + 1])) { // Invalid surrogate pair. *codePoint = kUnicodeReplacementCharacter; return false; } // Valid surrogate pair. *codePoint = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]); (*begin)++; } else { // Not a surrogate, just one 16-bit word. *codePoint = str[*begin]; } if (U_IS_UNICODE_CHAR(*codePoint)) return true; // Invalid code point. *codePoint = kUnicodeReplacementCharacter; return false; }
static void U_CALLCONV lineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { Options *storeOptions=(Options *)context; char *names[4]; int16_t lengths[4]={ 0, 0, 0, 0 }; static uint32_t prevCode=0; uint32_t code=0; if(U_FAILURE(*pErrorCode)) { return; } /* get the character code */ code=uprv_strtoul(fields[0][0], NULL, 16); /* get the character name */ if(storeOptions->storeNames) { names[0]=fields[1][0]; lengths[0]=getName(names+0, fields[1][1]); if(names[0][0]=='<') { /* do not store pseudo-names in <> brackets */ lengths[0]=0; } } /* store 1.0 names */ /* get the second character name, the one from Unicode 1.0 */ if(storeOptions->store10Names) { names[1]=fields[10][0]; lengths[1]=getName(names+1, fields[10][1]); if(names[1][0]=='<') { /* do not store pseudo-names in <> brackets */ lengths[1]=0; } } /* get the ISO 10646 comment */ if(storeOptions->storeISOComments) { names[2]=fields[11][0]; lengths[2]=getName(names+2, fields[11][1]); } if(lengths[0]+lengths[1]+lengths[2]==0) { return; } /* check for non-character code points */ if(!U_IS_UNICODE_CHAR(code)) { fprintf(stderr, "gennames: error - properties for non-character code point U+%04lx\n", (unsigned long)code); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } /* check that the code points (code) are in ascending order */ if(code<=prevCode && code>0) { fprintf(stderr, "gennames: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n", (unsigned long)code, (unsigned long)prevCode); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } prevCode=code; parseName(names[0], lengths[0]); parseName(names[1], lengths[1]); parseName(names[2], lengths[2]); if(cpNameAliasesIndex<cpNameAliasesTop && code>=cpNameAliases[cpNameAliasesIndex].code) { if(code==cpNameAliases[cpNameAliasesIndex].code) { names[3]=cpNameAliases[cpNameAliasesIndex].nameAlias; lengths[3]=(int16_t)uprv_strlen(cpNameAliases[cpNameAliasesIndex].nameAlias); ++cpNameAliasesIndex; } else { fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n", (unsigned long)code); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } } /* * set the count argument to * 1: only store regular names, or only store ISO 10646 comments * 2: store regular and 1.0 names * 3: store names and ISO 10646 comment * 4: also store name alias * * addLine() will ignore empty trailing names */ if(storeOptions->storeNames) { /* store names and comments as parsed according to storeOptions */ addLine(code, names, lengths, LENGTHOF(names)); } else { /* store only ISO 10646 comments */ addLine(code, names+2, lengths+2, 1); } }