UBool NormalizerConformanceTest::checkNorm(UNormalizationMode mode, int32_t options, const Normalizer2 *norm2, const UnicodeString &s, const UnicodeString &exp, int32_t field) { const char *modeString = kModeStrings[mode]; char msg[20]; snprintf(msg, sizeof(msg), kMessages[mode], field); UnicodeString out; UErrorCode errorCode = U_ZERO_ERROR; Normalizer::normalize(s, mode, options, out, errorCode); if (U_FAILURE(errorCode)) { dataerrln("Error running normalize UNORM_NF%s: %s", modeString, u_errorName(errorCode)); return FALSE; } if (!assertEqual(modeString, "", s, out, exp, msg)) { return FALSE; } iterativeNorm(s, mode, options, out, +1); if (!assertEqual(modeString, "(+1)", s, out, exp, msg)) { return FALSE; } iterativeNorm(s, mode, options, out, -1); if (!assertEqual(modeString, "(-1)", s, out, exp, msg)) { return FALSE; } if (norm2 == nullptr || options != 0) { return TRUE; } std::string s8; s.toUTF8String(s8); std::string exp8; exp.toUTF8String(exp8); std::string out8; Edits edits; Edits *editsPtr = (mode == UNORM_NFC || mode == UNORM_NFKC) ? &edits : nullptr; StringByteSink<std::string> sink(&out8, exp8.length()); norm2->normalizeUTF8(0, s8, sink, editsPtr, errorCode); if (U_FAILURE(errorCode)) { errln("Normalizer2.%s.normalizeUTF8(%s) failed: %s", modeString, s8.c_str(), u_errorName(errorCode)); return FALSE; } if (out8 != exp8) { errln("Normalizer2.%s.normalizeUTF8(%s)=%s != %s", modeString, s8.c_str(), out8.c_str(), exp8.c_str()); return FALSE; } if (editsPtr == nullptr) { return TRUE; } // Do the Edits cover the entire input & output? UBool pass = TRUE; pass &= assertEquals("edits.hasChanges()", (UBool)(s8 != out8), edits.hasChanges()); pass &= assertEquals("edits.lengthDelta()", (int32_t)(out8.length() - s8.length()), edits.lengthDelta()); Edits::Iterator iter = edits.getCoarseIterator(); while (iter.next(errorCode)) {} pass &= assertEquals("edits source length", s8.length(), iter.sourceIndex()); pass &= assertEquals("edits destination length", out8.length(), iter.destinationIndex()); return pass; }
UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field, const char *line, int32_t options, UErrorCode &status) { UBool pass = TRUE, result; //UErrorCode status = U_ZERO_ERROR; UnicodeString out, fcd; int32_t fieldNum; for (int32_t i=0; i<FIELD_COUNT; ++i) { fieldNum = i+1; if (i<3) { Normalizer::normalize(field[i], UNORM_NFC, options, out, status); if (U_FAILURE(status)) { dataerrln("Error running normalize UNORM_NFC: %s", u_errorName(status)); } else { pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fieldNum); iterativeNorm(field[i], UNORM_NFC, options, out, +1); pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c", fieldNum); iterativeNorm(field[i], UNORM_NFC, options, out, -1); pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c", fieldNum); } Normalizer::normalize(field[i], UNORM_NFD, options, out, status); if (U_FAILURE(status)) { dataerrln("Error running normalize UNORM_NFD: %s", u_errorName(status)); } else { pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fieldNum); iterativeNorm(field[i], UNORM_NFD, options, out, +1); pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c", fieldNum); iterativeNorm(field[i], UNORM_NFD, options, out, -1); pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c", fieldNum); } } Normalizer::normalize(field[i], UNORM_NFKC, options, out, status); if (U_FAILURE(status)) { dataerrln("Error running normalize UNORM_NFKC: %s", u_errorName(status)); } else { pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", fieldNum); iterativeNorm(field[i], UNORM_NFKC, options, out, +1); pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", fieldNum); iterativeNorm(field[i], UNORM_NFKC, options, out, -1); pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", fieldNum); } Normalizer::normalize(field[i], UNORM_NFKD, options, out, status); if (U_FAILURE(status)) { dataerrln("Error running normalize UNORM_NFKD: %s", u_errorName(status)); } else { pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", fieldNum); iterativeNorm(field[i], UNORM_NFKD, options, out, +1); pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", fieldNum); iterativeNorm(field[i], UNORM_NFKD, options, out, -1); pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", fieldNum); } } compare(field[1],field[2]); compare(field[0],field[1]); // test quick checks if(UNORM_NO == Normalizer::quickCheck(field[1], UNORM_NFC, options, status)) { errln("Normalizer error: quickCheck(NFC(s), UNORM_NFC) is UNORM_NO"); pass = FALSE; } if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_NFD, options, status)) { errln("Normalizer error: quickCheck(NFD(s), UNORM_NFD) is UNORM_NO"); pass = FALSE; } if(UNORM_NO == Normalizer::quickCheck(field[3], UNORM_NFKC, options, status)) { errln("Normalizer error: quickCheck(NFKC(s), UNORM_NFKC) is UNORM_NO"); pass = FALSE; } if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_NFKD, options, status)) { errln("Normalizer error: quickCheck(NFKD(s), UNORM_NFKD) is UNORM_NO"); pass = FALSE; } // branch on options==0 for better code coverage if(options==0) { result = Normalizer::isNormalized(field[1], UNORM_NFC, status); } else { result = Normalizer::isNormalized(field[1], UNORM_NFC, options, status); } if(!result) { dataerrln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE"); pass = FALSE; } if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) { errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE"); pass = FALSE; } if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) { dataerrln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE"); pass = FALSE; } if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) { errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE"); pass = FALSE; } // test FCD quick check and "makeFCD" Normalizer::normalize(field[0], UNORM_FCD, options, fcd, status); if(UNORM_NO == Normalizer::quickCheck(fcd, UNORM_FCD, options, status)) { errln("Normalizer error: quickCheck(FCD(s), UNORM_FCD) is UNORM_NO"); pass = FALSE; } if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_FCD, options, status)) { errln("Normalizer error: quickCheck(NFD(s), UNORM_FCD) is UNORM_NO"); pass = FALSE; } if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_FCD, options, status)) { errln("Normalizer error: quickCheck(NFKD(s), UNORM_FCD) is UNORM_NO"); pass = FALSE; } Normalizer::normalize(fcd, UNORM_NFD, options, out, status); if(out != field[2]) { dataerrln("Normalizer error: NFD(FCD(s))!=NFD(s)"); pass = FALSE; } if (U_FAILURE(status)) { dataerrln("Normalizer::normalize returned error status: %s", u_errorName(status)); pass = FALSE; } if(field[0]!=field[2]) { // two strings that are canonically equivalent must test // equal under a canonical caseless match // see UAX #21 Case Mappings and Jitterbug 2021 and // Unicode Technical Committee meeting consensus 92-C31 int32_t rc; status=U_ZERO_ERROR; rc=Normalizer::compare(field[0], field[2], (options<<UNORM_COMPARE_NORM_OPTIONS_SHIFT)|U_COMPARE_IGNORE_CASE, status); if(U_FAILURE(status)) { dataerrln("Normalizer::compare(case-insensitive) sets %s", u_errorName(status)); pass=FALSE; } else if(rc!=0) { errln("Normalizer::compare(original, NFD, case-insensitive) returned %d instead of 0 for equal", rc); pass=FALSE; } } if (!pass) { dataerrln("FAIL: %s", line); } return pass; }