示例#1
0
librevenge::RVNGString libvisio::VSDMetaData::readCodePageString(librevenge::RVNGInputStream *input)
{
  uint32_t size = readU32(input);
  if (size > getRemainingLength(input))
    size = getRemainingLength(input);

  if (size == 0)
    return librevenge::RVNGString();

  std::vector<unsigned char> characters;
  for (uint32_t i = 0; i < size; ++i)
    characters.push_back(readU8(input));

  uint32_t codepage = getCodePage();
  librevenge::RVNGString string;

  if (codepage == 65001)
  {
    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
    // says this is UTF-8.
    characters.push_back(0);
    string.append(reinterpret_cast<const char *>(characters.data()));
  }
  else
  {
    UErrorCode status = U_ZERO_ERROR;
    UConverter *conv = nullptr;

    switch (codepage)
    {
    case 1252:
      // http://msdn.microsoft.com/en-us/goglobal/bb964654
      conv = ucnv_open("windows-1252", &status);
      break;
    }

    if (U_SUCCESS(status) && conv)
    {
      assert(!characters.empty());
      const auto *src = (const char *)&characters[0];
      const char *srcLimit = (const char *)src + characters.size();
      while (src < srcLimit)
      {
        UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
        if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character))
          appendUCS4(string, ucs4Character);
      }
    }

    if (conv)
      ucnv_close(conv);
  }

  return string;
}
示例#2
0
static void U_CALLCONV
nameAliasesLineFn(void *context,
       char *fields[][2], int32_t fieldCount,
       UErrorCode *pErrorCode) {
    char *name;
    int16_t length=0;
    static uint32_t prevCode=0;
    uint32_t code=0;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }
    /* get the character code */
    code=uprv_strtoul(fields[0][0], NULL, 16);

    /* get the character name */
    name=fields[1][0];
    length=getName(&name, fields[1][1]);
    if(length==0 || length>=sizeof(cpNameAliases[cpNameAliasesTop].nameAlias)) {
        fprintf(stderr, "gennames: error - name alias %s empty or too long for code point U+%04lx\n",
                name, (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check for non-character code points */
    if(!U_IS_UNICODE_CHAR(code)) {
        fprintf(stderr, "gennames: error - name alias for non-character code point U+%04lx\n",
                (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check that the code points (code) are in ascending order */
    if(code<=prevCode && code>0) {
        fprintf(stderr, "gennames: error - NameAliases entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)code, (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=code;

    if(cpNameAliasesTop>=LENGTHOF(cpNameAliases)) {
        fprintf(stderr, "gennames: error - too many name aliases\n");
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    cpNameAliases[cpNameAliasesTop].code=code;
    uprv_memcpy(cpNameAliases[cpNameAliasesTop].nameAlias, name, length);
    cpNameAliases[cpNameAliasesTop].nameAlias[length]=0;
    ++cpNameAliasesTop;

    parseName(name, length);
}
示例#3
0
bool readUTFChar(const char* str, int* begin, int length, unsigned* codePointOut)
{
    int codePoint; // Avoids warning when U8_NEXT writes -1 to it.
    U8_NEXT(str, *begin, length, codePoint);
    *codePointOut = static_cast<unsigned>(codePoint);

    // The ICU macro above moves to the next char, we want to point to the last
    // char consumed.
    (*begin)--;

    // Validate the decoded value.
    if (U_IS_UNICODE_CHAR(codePoint))
        return true;
    *codePointOut = kUnicodeReplacementCharacter;
    return false;
}
示例#4
0
bool readUTFChar(const UChar* str, int* begin, int length, unsigned* codePoint)
{
    if (U16_IS_SURROGATE(str[*begin])) {
        if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || !U16_IS_TRAIL(str[*begin + 1])) {
            // Invalid surrogate pair.
            *codePoint = kUnicodeReplacementCharacter;
            return false;
        }

        // Valid surrogate pair.
        *codePoint = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]);
        (*begin)++;
    } else {
        // Not a surrogate, just one 16-bit word.
        *codePoint = str[*begin];
    }

    if (U_IS_UNICODE_CHAR(*codePoint))
        return true;

    // Invalid code point.
    *codePoint = kUnicodeReplacementCharacter;
    return false;
}
示例#5
0
static void U_CALLCONV
lineFn(void *context,
       char *fields[][2], int32_t fieldCount,
       UErrorCode *pErrorCode) {
    Options *storeOptions=(Options *)context;
    char *names[4];
    int16_t lengths[4]={ 0, 0, 0, 0 };
    static uint32_t prevCode=0;
    uint32_t code=0;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }
    /* get the character code */
    code=uprv_strtoul(fields[0][0], NULL, 16);

    /* get the character name */
    if(storeOptions->storeNames) {
        names[0]=fields[1][0];
        lengths[0]=getName(names+0, fields[1][1]);
        if(names[0][0]=='<') {
            /* do not store pseudo-names in <> brackets */
            lengths[0]=0;
        }
    }

    /* store 1.0 names */
    /* get the second character name, the one from Unicode 1.0 */
    if(storeOptions->store10Names) {
        names[1]=fields[10][0];
        lengths[1]=getName(names+1, fields[10][1]);
        if(names[1][0]=='<') {
            /* do not store pseudo-names in <> brackets */
            lengths[1]=0;
        }
    }

    /* get the ISO 10646 comment */
    if(storeOptions->storeISOComments) {
        names[2]=fields[11][0];
        lengths[2]=getName(names+2, fields[11][1]);
    }

    if(lengths[0]+lengths[1]+lengths[2]==0) {
        return;
    }

    /* check for non-character code points */
    if(!U_IS_UNICODE_CHAR(code)) {
        fprintf(stderr, "gennames: error - properties for non-character code point U+%04lx\n",
                (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check that the code points (code) are in ascending order */
    if(code<=prevCode && code>0) {
        fprintf(stderr, "gennames: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)code, (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=code;

    parseName(names[0], lengths[0]);
    parseName(names[1], lengths[1]);
    parseName(names[2], lengths[2]);

    if(cpNameAliasesIndex<cpNameAliasesTop && code>=cpNameAliases[cpNameAliasesIndex].code) {
        if(code==cpNameAliases[cpNameAliasesIndex].code) {
            names[3]=cpNameAliases[cpNameAliasesIndex].nameAlias;
            lengths[3]=(int16_t)uprv_strlen(cpNameAliases[cpNameAliasesIndex].nameAlias);
            ++cpNameAliasesIndex;
        } else {
            fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n",
                    (unsigned long)code);
            *pErrorCode=U_PARSE_ERROR;
            exit(U_PARSE_ERROR);
        }
    }

    /*
     * set the count argument to
     * 1: only store regular names, or only store ISO 10646 comments
     * 2: store regular and 1.0 names
     * 3: store names and ISO 10646 comment
     * 4: also store name alias
     *
     * addLine() will ignore empty trailing names
     */
    if(storeOptions->storeNames) {
        /* store names and comments as parsed according to storeOptions */
        addLine(code, names, lengths, LENGTHOF(names));
    } else {
        /* store only ISO 10646 comments */
        addLine(code, names+2, lengths+2, 1);
    }
}