Пример #1
0
        bool IsIdContinue(codepoint_t ch)
        {
#ifdef HAS_REAL_ICU
            if (u_hasBinaryProperty(ch, UCHAR_ID_CONTINUE) == 1)
            {
                return true;
            }
#endif
            // Following codepoints are treated as part of ID_Continue
            // for backwards compatibility as per section 2.5 of the Unicode 8 spec
            // See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility
            // The exact list is in PropList.txt in the Unicode database
            switch (ch)
            {
            case 0x00B7: return true; // MIDDLE DOT
            case 0x0387: return true; // GREEK ANO TELEIA
            case 0x1369: return true; // ETHIOPIC DIGIT ONE
            case 0x136A: return true; // ETHIOPIC DIGIT TWO
            case 0x136B: return true; // ETHIOPIC DIGIT THREE
            case 0x136C: return true; // ETHIOPIC DIGIT FOUR
            case 0x136D: return true; // ETHIOPIC DIGIT FIVE
            case 0x136E: return true; // ETHIOPIC DIGIT SIX
            case 0x136F: return true; // ETHIOPIC DIGIT SEVEN
            case 0x1370: return true; // ETHIOPIC DIGIT EIGHT
            case 0x1371: return true; // ETHIOPIC DIGIT NINE
            case 0x19DA: return true; // NEW TAI LUE THAM DIGIT ONE
            default: return false;
            }
        }
bool SmallCapsIterator::consume(unsigned *capsLimit, SmallCapsBehavior* smallCapsBehavior)
{
    if (m_atEnd)
        return false;

    while (m_utf16Iterator->consume(m_nextUChar32)) {
        m_previousSmallCapsBehavior = m_currentSmallCapsBehavior;
        // Skipping over combining marks, as these combine with the small-caps
        // uppercased text as well and we do not need to split by their
        // individual case-ness.
        if (!u_getCombiningClass(m_nextUChar32)) {
            m_currentSmallCapsBehavior = u_hasBinaryProperty(m_nextUChar32, UCHAR_CHANGES_WHEN_UPPERCASED)
                ? SmallCapsUppercaseNeeded
                : SmallCapsSameCase;
        }

        if (m_previousSmallCapsBehavior != m_currentSmallCapsBehavior
            && m_previousSmallCapsBehavior != SmallCapsInvalid) {
            *capsLimit = m_utf16Iterator->offset();
            *smallCapsBehavior = m_previousSmallCapsBehavior;
            return true;
        }
        m_utf16Iterator->advance();
    }
    *capsLimit = m_bufferSize;
    *smallCapsBehavior = m_currentSmallCapsBehavior;
    m_atEnd = true;
    return true;
}
bool SimpleFontData::canRenderCombiningCharacterSequence(const UChar* characters, size_t length) const
{
    if (!m_combiningCharacterSequenceSupport)
        m_combiningCharacterSequenceSupport = adoptPtr(new HashMap<String, bool>);

    WTF::HashMap<String, bool>::AddResult addResult = m_combiningCharacterSequenceSupport->add(String(characters, length), false);
    if (!addResult.isNewEntry)
        return addResult.iterator->value;

    UErrorCode error = U_ZERO_ERROR;
    Vector<UChar, 4> normalizedCharacters(length);
    int32_t normalizedLength = unorm_normalize(characters, length, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], length, &error);
    if (U_FAILURE(error))
        return false;

    int position = 0;
    while (position < normalizedLength) {
        UChar32 character;
        int nextPosition = position;
        U16_NEXT(normalizedCharacters, nextPosition, normalizedLength, character);

        if (!u_hasBinaryProperty(character, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
            FS_USHORT glyph = FS_map_char(m_platformData.font(), static_cast<FS_ULONG>(character));
            if (!glyph)
                return false;
        }

        position = nextPosition;
    }

    addResult.iterator->value = true;
    return true;
}
Пример #4
0
 static bool match( Input& in ) noexcept( noexcept( Peek::peek( in ) ) )
 {
    if( const auto r = Peek::peek( in ) ) {
       if( u_hasBinaryProperty( r.data, P ) == V ) {
          in.bump( r.size );
          return true;
       }
    }
    return false;
 }
Пример #5
0
Validator::CharClass ValidateGrapheme::UnicodeToCharClass(char32 ch) const {
  if (IsVedicAccent(ch)) return CharClass::kVedicMark;
  // The ZeroWidth[Non]Joiner characters are mapped to kCombiner as they
  // always combine with the previous character.
  if (u_hasBinaryProperty(ch, UCHAR_GRAPHEME_LINK)) return CharClass::kVirama;
  if (u_isUWhiteSpace(ch)) return CharClass::kWhitespace;
  int char_type = u_charType(ch);
  if (char_type == U_NON_SPACING_MARK || char_type == U_ENCLOSING_MARK ||
      char_type == U_COMBINING_SPACING_MARK || ch == kZeroWidthNonJoiner ||
      ch == kZeroWidthJoiner)
    return CharClass::kCombiner;
  return CharClass::kOther;
}
Пример #6
0
//------------------------------------------------------------------------------
//
//  stripRules    Return a rules string without extra spaces.
//                (Comments are removed separately, during rule parsing.)
//
//------------------------------------------------------------------------------
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
    UnicodeString strippedRules;
    int32_t rulesLength = rules.length();
    bool skippingSpaces = false;

    for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
        UChar32 cp = rules.char32At(idx);
        bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
        if (skippingSpaces && whiteSpace) {
            continue;
        }
        strippedRules.append(cp);
        skippingSpaces = whiteSpace;
    }
    return strippedRules;
}
Пример #7
0
HqBool unicode_has_binary_property(UTF32 c,
                                   UTF32_Property which,
                                   HqBool *error_occured )
{
  HQASSERT(error_occured != NULL, "error_occured is NULL") ;
  HQASSERT((int)UCHAR_BINARY_START == (int)UTF32_BINARY_START,
           "icu and hqn enumerations appear to be out of synch") ;
  HQASSERT((int)UCHAR_STRING_LIMIT == (int)UTF32_STRING_LIMIT,
           "icu and hqn enumerations appear to be out of synch") ;

  *error_occured = FALSE ;

  if ( !unicode_icu_ready() ) {
    *error_occured = TRUE ;
    return FALSE ;
  }

  return (HqBool)u_hasBinaryProperty((UChar32)c, (UProperty)which) ;
}
Пример #8
0
static int
u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
{
    ASSERT_ARGS(u_iscclass)
#if PARROT_HAS_ICU
    UNUSED(interp);
            /* XXX which one
               return u_charDigitValue(codepoint);
               */
    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
    if ((flags & enum_cclass_word)         &&
        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
    if ((flags & enum_cclass_newline)      &&
        (codepoint == 0x2028 || codepoint == 0x2029 ||
         u_hasBinaryProperty(codepoint, UCHAR_LINE_BREAK)))          return 1;

    return 0;
#else
    if (codepoint < 256)
        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;

    if (flags == enum_cclass_any)
        return 1;

    /* All codepoints from u+0100 to u+02af are alphabetic, so we
     * cheat on the WORD and ALPHABETIC properties to include these
     * (and incorrectly exclude all others).  This is a stopgap until
     * ICU is everywhere, or we have better non-ICU unicode support. */
    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
        return (codepoint < 0x2b0);

    if (flags & enum_cclass_whitespace) {
        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
        switch (codepoint) {
          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
          case 0x205f: case 0x3000:
            return 1;
          default:
            break;
        }
    }

    if (flags & enum_cclass_numeric) {
        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
    }

    if (flags & enum_cclass_newline) {
        /* from http://www.unicode.org/Public/UNIDATA/extracted/DerivedLineBreak.txt
         * Line_Break=Mandatory_Break*/
        if (codepoint == 0x2028 || codepoint == 0x2029) return 1;
    }

    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
        Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_LIBRARY_ERROR,
            "no ICU lib loaded");

    return 0;
#endif
}
Пример #9
0
Variant HHVM_STATIC_METHOD(IntlChar, hasBinaryProperty,
                           const Variant& arg, int64_t prop) {
  GETCP(arg, cp);
  return (bool)u_hasBinaryProperty(cp, (UProperty)prop);
}
Пример #10
0
void UnicodeTest::TestAdditionalProperties() {
#if !UCONFIG_NO_NORMALIZATION
    // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt
    if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) {
        errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n",
              LENGTHOF(derivedPropsNames));
        return;
    }
    if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) {
        errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)\n");
        return;
    }

    char newPath[256];
    char backupPath[256];
    char *fields[2][2];
    UErrorCode errorCode=U_ZERO_ERROR;

    /* Look inside ICU_DATA first */
    strcpy(newPath, pathToDataDirectory());
    strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

    // As a fallback, try to guess where the source data was located
    // at the time ICU was built, and look there.
#   ifdef U_TOPSRCDIR
        strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
#   else
        strcpy(backupPath, loadTestData(errorCode));
        strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
#   endif
    strcat(backupPath, U_FILE_SEP_STRING);
    strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

    char *path=newPath;
    u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode);

    if(errorCode==U_FILE_ACCESS_ERROR) {
        errorCode=U_ZERO_ERROR;
        path=backupPath;
        u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    }
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
        return;
    }
    char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt");
    strcpy(basename, "DerivedNormalizationProps.txt");
    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode));
        return;
    }

    // now we have all derived core properties in the UnicodeSets
    // run them all through the API
    int32_t rangeCount, range;
    uint32_t i;
    UChar32 start, end;

    // test all TRUE properties
    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      dataerrln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }

    // invert all properties
    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
        derivedProps[i].complement();
    }

    // test all FALSE properties
    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      errln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }
#endif /* !UCONFIG_NO_NORMALIZATION */
}
Пример #11
0
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which) {
    UErrorCode errorCode;

    if(which<UCHAR_BINARY_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_BINARY_LIMIT) {
        return (int32_t)u_hasBinaryProperty(c, which);
    } else if(which<UCHAR_INT_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_INT_LIMIT) {
        switch(which) {
        case UCHAR_BIDI_CLASS:
            return (int32_t)u_charDirection(c);
        case UCHAR_BLOCK:
            return (int32_t)ublock_getCode(c);
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_CANONICAL_COMBINING_CLASS:
            return u_getCombiningClass(c);
#endif
        case UCHAR_DECOMPOSITION_TYPE:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
        case UCHAR_EAST_ASIAN_WIDTH:
            return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
        case UCHAR_GENERAL_CATEGORY:
            return (int32_t)u_charType(c);
        case UCHAR_JOINING_GROUP:
            return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
        case UCHAR_JOINING_TYPE:
            return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
        case UCHAR_LINE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
        case UCHAR_NUMERIC_TYPE: {
            int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
            return UPROPS_NTV_GET_TYPE(ntv);
        }
        case UCHAR_SCRIPT:
            errorCode=U_ZERO_ERROR;
            return (int32_t)uscript_getScript(c, &errorCode);
        case UCHAR_HANGUL_SYLLABLE_TYPE: {
            /* see comments on gcbToHst[] above */
            int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
            if(gcb<LENGTHOF(gcbToHst)) {
                return gcbToHst[gcb];
            } else {
                return U_HST_NOT_APPLICABLE;
            }
        }
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_NFD_QUICK_CHECK:
        case UCHAR_NFKD_QUICK_CHECK:
        case UCHAR_NFC_QUICK_CHECK:
        case UCHAR_NFKC_QUICK_CHECK:
            return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)>>8;
        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)&0xff;
#endif
        case UCHAR_GRAPHEME_CLUSTER_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
        case UCHAR_SENTENCE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
        case UCHAR_WORD_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
        default:
            return 0; /* undefined */
        }
    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
Пример #12
0
static inline bool isIllegalCombiningDotLeadCharacterNoLookup(UChar32 cp) {
    return cp == u'i' || cp == u'j' || cp == u'ı' || cp == u'ȷ' || cp == u'l' ||
           u_hasBinaryProperty(cp, UCHAR_SOFT_DOTTED);
}
Пример #13
0
/*
 * imp: common/uprops.cpp
 * hdr: common/unicode/uchar.h
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_hasBinaryProperty_4_0(UChar32 c, UProperty which)
{
    return u_hasBinaryProperty(c, which);
}
Пример #14
0
    bool CheckString(CatalogItemPtr item, const wxString& source, const wxString& translation) override
    {
        const UChar32 s_last = source.Last();
        const UChar32 t_last = translation.Last();
        const bool s_punct = u_ispunct(s_last);
        const bool t_punct = u_ispunct(t_last);

        if (u_getIntPropertyValue(s_last, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE ||
            u_getIntPropertyValue(t_last, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE)
        {
            // too many reordering related false positives for brackets
            // e.g. "your {site} account" -> "váš účet na {site}"
            if ((wchar_t)u_getBidiPairedBracket(s_last) != (wchar_t)source[0])
            {
                return false;
            }
            else
            {
                // OTOH, it's desirable to check strings fully enclosed in brackets like "(unsaved)"
                if (source.find_first_of((wchar_t)s_last, 1) != source.size() - 1)
                {
                    // it's more complicated, possibly something like "your {foo} on {bar}"
                    return false;
                }
            }
        }

        if (u_hasBinaryProperty(s_last, UCHAR_QUOTATION_MARK) || (!s_punct && u_hasBinaryProperty(t_last, UCHAR_QUOTATION_MARK)))
        {
            // quoted fragments can move around, e.g., so ignore quotes in reporting:
            //      >> Invalid value for ‘{fieldName}’​ field
            //      >> Valor inválido para el campo ‘{fieldName}’
            // TODO: count quote characters to check if used correctly in translation; don't check position
            return false;
        }

        if (s_punct && !t_punct)
        {
            item->SetIssue(CatalogItem::Issue::Warning,
                           wxString::Format(_(L"The translation should end with “%s”."), wxString(wxUniChar(s_last))));
            return true;
        }
        else if (!s_punct && t_punct)
        {
            item->SetIssue(CatalogItem::Issue::Warning,
                           wxString::Format(_(L"The translation should not end with “%s”."), wxString(wxUniChar(t_last))));
            return true;
        }
        else if (s_punct && t_punct && s_last != t_last)
        {
            if (t_last == L'…' && source.EndsWith("..."))
            {
                // as a special case, allow translating ... (3 dots) as … (ellipsis)
            }
            else if (u_hasBinaryProperty(s_last, UCHAR_QUOTATION_MARK) && u_hasBinaryProperty(t_last, UCHAR_QUOTATION_MARK))
            {
                // don't check for correct quotes for now, accept any quotations marks as equal
            }
            else if (IsEquivalent(s_last, t_last))
            {
                // some characters are mostly equivalent and we shouldn't warn about them
            }
            else
            {
                item->SetIssue(CatalogItem::Issue::Warning,
                               wxString::Format(_(L"The translation ends with “%s”, but the source text ends with “%s”."),
                                                wxString(wxUniChar(t_last)), wxString(wxUniChar(s_last))));
                return true;
            }
        }

        return false;
    }
Пример #15
0
swift::__swift_stdlib_UBool
swift::__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32 c,
                                          __swift_stdlib_UProperty p) {
  return u_hasBinaryProperty(c, static_cast<UProperty>(p));
}
Пример #16
0
void UnicodeTest::TestAdditionalProperties() {
#if !UCONFIG_NO_NORMALIZATION
    // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt
    if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) {
        errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n",
              UPRV_LENGTHOF(derivedPropsNames));
        return;
    }
    if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) {
        errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)\n");
        return;
    }

    char path[500];
    if(getUnidataPath(path) == NULL) {
        errln("unable to find path to source/data/unidata/");
        return;
    }
    char *basename=strchr(path, 0);
    strcpy(basename, "DerivedCoreProperties.txt");

    char *fields[2][2];
    UErrorCode errorCode=U_ZERO_ERROR;
    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
        return;
    }

    strcpy(basename, "DerivedNormalizationProps.txt");
    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode));
        return;
    }

    // now we have all derived core properties in the UnicodeSets
    // run them all through the API
    int32_t rangeCount, range;
    uint32_t i;
    UChar32 start, end;

    // test all TRUE properties
    for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      dataerrln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }

    // invert all properties
    for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
        derivedProps[i].complement();
    }

    // test all FALSE properties
    for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      errln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }
#endif /* !UCONFIG_NO_NORMALIZATION */
}