Пример #1
0
U_CAPI int32_t U_EXPORT2
uidna_toUnicode(const UChar* src, int32_t srcLength,
                UChar* dest, int32_t destCapacity,
                int32_t options,
                UParseError* parseError,
                UErrorCode* status){

    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }
    if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);

    if(U_FAILURE(*status)){
        return -1;
    }

    int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);

    usprep_close(nameprep);

    return retLen;
}
Пример #2
0
char *
_mongoc_sasl_prep_impl (const char *name,
                        const char *in_utf8,
                        int in_utf8_len,
                        bson_error_t *err)
{
   /* The flow is in_utf8 -> in_utf16 -> SASLPrep -> out_utf16 -> out_utf8. */
   UChar *in_utf16, *out_utf16;
   char *out_utf8;
   int32_t in_utf16_len, out_utf16_len, out_utf8_len;
   UErrorCode error_code = U_ZERO_ERROR;
   UStringPrepProfile *prep;

#define SASL_PREP_ERR_RETURN(msg)                        \
   do {                                                  \
      bson_set_error (err,                               \
                      MONGOC_ERROR_SCRAM,                \
                      MONGOC_ERROR_SCRAM_PROTOCOL_ERROR, \
                      (msg),                             \
                      name);                             \
      return NULL;                                       \
   } while (0)

   /* 1. convert str to UTF-16. */
   /* preflight to get the destination length. */
   (void) u_strFromUTF8 (
      NULL, 0, &in_utf16_len, in_utf8, in_utf8_len, &error_code);
   if (error_code != U_BUFFER_OVERFLOW_ERROR) {
      SASL_PREP_ERR_RETURN ("could not calculate UTF-16 length of %s");
   }

   /* convert to UTF-16. */
   error_code = U_ZERO_ERROR;
   in_utf16 = bson_malloc (sizeof (UChar) *
                           (in_utf16_len + 1)); /* add one for null byte. */
   (void) u_strFromUTF8 (
      in_utf16, in_utf16_len + 1, NULL, in_utf8, in_utf8_len, &error_code);
   if (error_code) {
      bson_free (in_utf16);
      SASL_PREP_ERR_RETURN ("could not convert %s to UTF-16");
   }

   /* 2. perform SASLPrep. */
   prep = usprep_openByType (USPREP_RFC4013_SASLPREP, &error_code);
   if (error_code) {
      bson_free (in_utf16);
      SASL_PREP_ERR_RETURN ("could not start SASLPrep for %s");
   }
   /* preflight. */
   out_utf16_len = usprep_prepare (
      prep, in_utf16, in_utf16_len, NULL, 0, USPREP_DEFAULT, NULL, &error_code);
   if (error_code != U_BUFFER_OVERFLOW_ERROR) {
      bson_free (in_utf16);
      usprep_close (prep);
      SASL_PREP_ERR_RETURN ("could not calculate SASLPrep length of %s");
   }

   /* convert. */
   error_code = U_ZERO_ERROR;
   out_utf16 = bson_malloc (sizeof (UChar) * (out_utf16_len + 1));
   (void) usprep_prepare (prep,
                          in_utf16,
                          in_utf16_len,
                          out_utf16,
                          out_utf16_len + 1,
                          USPREP_DEFAULT,
                          NULL,
                          &error_code);
   if (error_code) {
      bson_free (in_utf16);
      bson_free (out_utf16);
      usprep_close (prep);
      SASL_PREP_ERR_RETURN ("could not execute SASLPrep for %s");
   }
   bson_free (in_utf16);
   usprep_close (prep);

   /* 3. convert back to UTF-8. */
   /* preflight. */
   (void) u_strToUTF8 (
      NULL, 0, &out_utf8_len, out_utf16, out_utf16_len, &error_code);
   if (error_code != U_BUFFER_OVERFLOW_ERROR) {
      bson_free (out_utf16);
      SASL_PREP_ERR_RETURN ("could not calculate UTF-8 length of %s");
   }

   /* convert. */
   error_code = U_ZERO_ERROR;
   out_utf8 = (char *) bson_malloc (
      sizeof (char) * (out_utf8_len + 1)); /* add one for null byte. */
   (void) u_strToUTF8 (
      out_utf8, out_utf8_len + 1, NULL, out_utf16, out_utf16_len, &error_code);
   if (error_code) {
      bson_free (out_utf8);
      bson_free (out_utf16);
      SASL_PREP_ERR_RETURN ("could not convert %s back to UTF-8");
   }
   bson_free (out_utf16);
   return out_utf8;
#undef SASL_PREP_ERR_RETURN
}
Пример #3
0
U_CAPI int32_t U_EXPORT2
uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
                     UChar* dest, int32_t destCapacity,
                     int32_t options,
                     UParseError* parseError,
                     UErrorCode* status){

    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }
    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    int32_t reqLength = 0;

    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);

    if(U_FAILURE(*status)){
        return 0;
    }

    //initialize pointers
    UChar *delimiter = (UChar*)src;
    UChar *labelStart = (UChar*)src;
    UChar *currentDest = (UChar*) dest;
    int32_t remainingLen = srcLength;
    int32_t remainingDestCapacity = destCapacity;
    int32_t labelLen = 0, labelReqLength = 0;
    UBool done = FALSE;

    for(;;){

        labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);

        // The RFC states that
        // <quote>
        // ToUnicode never fails. If any step fails, then the original input
        // is returned immediately in that step.
        // </quote>
        // _internal_toUnicode will copy the label.
        /*if(labelLen==0 && done==FALSE){
            *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
            break;
        }*/

        labelReqLength = _internal_toUnicode(labelStart, labelLen,
                                             currentDest, remainingDestCapacity,
                                             options, nameprep,
                                             parseError, status);

        if(*status == U_BUFFER_OVERFLOW_ERROR){
            *status = U_ZERO_ERROR; // reset error
            remainingDestCapacity = 0;
        }

        if(U_FAILURE(*status)){
            break;
        }

        reqLength +=labelReqLength;
        // adjust the destination pointer
        if(labelReqLength < remainingDestCapacity){
            currentDest = currentDest + labelReqLength;
            remainingDestCapacity -= labelReqLength;
        }else{
            // should never occur
            remainingDestCapacity = 0;
        }

        if(done == TRUE){
            break;
        }

        // add the label separator
        // Unlike the ToASCII operation we don't normalize the label separators
        if(remainingDestCapacity > 0){
            *currentDest++ = *(labelStart + labelLen);
            remainingDestCapacity--;
        }
        reqLength++;

        labelStart = delimiter;
        if(remainingLen >0 ){
            remainingLen = (int32_t)(srcLength - (delimiter - src));
        }

    }

    if(reqLength > MAX_DOMAIN_NAME_LENGTH){
        *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
    }

    usprep_close(nameprep);

    return u_terminateUChars(dest, destCapacity, reqLength, status);
}
Пример #4
0
U_CAPI int32_t U_EXPORT2
uidna_IDNToASCII(  const UChar *src, int32_t srcLength,
                   UChar* dest, int32_t destCapacity,
                   int32_t options,
                   UParseError *parseError,
                   UErrorCode *status){

    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }
    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    int32_t reqLength = 0;

    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);

    if(U_FAILURE(*status)){
        return 0;
    }

    //initialize pointers
    UChar *delimiter = (UChar*)src;
    UChar *labelStart = (UChar*)src;
    UChar *currentDest = (UChar*) dest;
    int32_t remainingLen = srcLength;
    int32_t remainingDestCapacity = destCapacity;
    int32_t labelLen = 0, labelReqLength = 0;
    UBool done = FALSE;


    for(;;){

        labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
        labelReqLength = 0;
        if(!(labelLen==0 && done)){// make sure this is not a root label separator.

            labelReqLength = _internal_toASCII( labelStart, labelLen,
                                                currentDest, remainingDestCapacity,
                                                options, nameprep,
                                                parseError, status);

            if(*status == U_BUFFER_OVERFLOW_ERROR){

                *status = U_ZERO_ERROR; // reset error
                remainingDestCapacity = 0;
            }
        }


        if(U_FAILURE(*status)){
            break;
        }

        reqLength +=labelReqLength;
        // adjust the destination pointer
        if(labelReqLength < remainingDestCapacity){
            currentDest = currentDest + labelReqLength;
            remainingDestCapacity -= labelReqLength;
        }else{
            // should never occur
            remainingDestCapacity = 0;
        }

        if(done == TRUE){
            break;
        }

        // add the label separator
        if(remainingDestCapacity > 0){
            *currentDest++ = FULL_STOP;
            remainingDestCapacity--;
        }
        reqLength++;

        labelStart = delimiter;
        if(remainingLen >0 ){
            remainingLen = (int32_t)(srcLength - (delimiter - src));
        }

    }

    if(reqLength > MAX_DOMAIN_NAME_LENGTH){
        *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
    }

    usprep_close(nameprep);

    return u_terminateUChars(dest, destCapacity, reqLength, status);
}
Пример #5
0
extern int
testData(TestIDNA& test) {
    char *basename=NULL;
    UErrorCode errorCode=U_ZERO_ERROR;
    char *saveBasename =NULL;

    profile = usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode);
    if(U_FAILURE(errorCode)){
        test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
        return errorCode;
    }
    
    char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
    //TODO get the srcDir dynamically 
    const char *srcDir=IntlTest::pathToDataDirectory();

    idnTrie     = &profile->sprepTrie;
    indexes     = profile->indexes;
    mappingData = profile->mappingData;

    //initialize
    pTestIDNA = &test;
    
    /* prepare the filename beginning with the source dir */
    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
        filename[0] = 0x2E;
        filename[1] = U_FILE_SEP_CHAR;
        uprv_strcpy(filename+2,srcDir);
    }else{
        uprv_strcpy(filename, srcDir);
    }
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }

    /* process unassigned */
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }
    
    /* first copy misc directory */
    saveBasename = basename;
    uprv_strcpy(basename,SPREP_DIR);
    basename = basename + uprv_strlen(SPREP_DIR);
    *basename++=U_FILE_SEP_CHAR;
    
    /* process unassigned */
    uprv_strcpy(basename,fileNames[0]);
    parseMappings(filename,TRUE, test,&errorCode);
    if(U_FAILURE(errorCode)) {
        test.errln( "Could not open file %s for reading \n", filename);
        return errorCode;
    }

    testAllCodepoints(test);

    usprep_close(profile);
    pTestIDNA = NULL;
    free(filename);
    return errorCode;
}
Пример #6
0
extern int
main(int argc, const char *argv[]) {
    ExitingErrorCode errorCode("genuts46");

    // predefined base sets
    icu::UnicodeSet unassignedSet(UNICODE_STRING_SIMPLE("[:Cn:]"), errorCode);

    icu::UnicodeSet labelSeparators(
        UNICODE_STRING_SIMPLE("[\\u002E\\u3002\\uFF0E\\uFF61]"), errorCode);

    icu::UnicodeSet mappedSet(
        UNICODE_STRING_SIMPLE("[:Changes_When_NFKC_Casefolded:]"), errorCode);
    mappedSet.removeAll(labelSeparators);  // simplifies checking of mapped characters

    icu::UnicodeSet baseValidSet(icu::UnicodeString(
        "[[[[:^Changes_When_NFKC_Casefolded:]"
        "-[:C:]-[:Z:]"
        "-[:Block=Ideographic_Description_Characters:]]"
        "[:ascii:]]-[.]]", -1, US_INV), errorCode);

    // Characters that are disallowed when STD3 rules are applied,
    // but valid when STD3 rules are not applied.
    icu::UnicodeSet disallowedSTD3Set(icu::UnicodeString(
        "[[:ascii:]-[\\u002D.a-zA-Z0-9]]", -1, US_INV), errorCode);

    icu::UnicodeSet deviationSet(
        UNICODE_STRING_SIMPLE("[\\u00DF\\u03C2\\u200C\\u200D]"), errorCode);
    errorCode.assertSuccess();

    // derived sets
    icu::LocalUStringPrepProfilePointer namePrep(usprep_openByType(USPREP_RFC3491_NAMEPREP, errorCode));
    const icu::Normalizer2 *nfkc_cf=
        icu::Normalizer2::getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode);
    errorCode.assertSuccess();

    // HACK: The StringPrep API performs a BiDi check according to the data.
    // We need to override that for this data generation, by resetting an internal flag.
    namePrep->checkBiDi=FALSE;

    icu::UnicodeSet baseExclusionSet;
    icu::UnicodeString cString, mapping, namePrepResult;
    for(UChar32 c=0; c<=0x10ffff; ++c) {
        if(c==0xd800) {
            c=0xe000;
        }
        int namePrepStatus=toIDNA2003(namePrep.getAlias(), c, namePrepResult);
        if(namePrepStatus!=0) {
            // get the UTS #46 base mapping value
            switch(c) {
            case 0xff0e:
            case 0x3002:
            case 0xff61:
                mapping.setTo(0x2e);
                break;
            default:
                cString.setTo(c);
                nfkc_cf->normalize(cString, mapping, errorCode);
                break;
            }
            if(
                namePrepStatus>0 ?
                    // c is valid or mapped in IDNA2003
                    !labelSeparators.contains(c) && namePrepResult!=mapping :
                    // namePrepStatus<0: c is prohibited in IDNA2003
                    baseValidSet.contains(c) || (cString!=mapping && baseValidSet.containsAll(mapping))
            ) {
                baseExclusionSet.add(c);
            }
        }
    }

    icu::UnicodeSet disallowedSet(0, 0x10ffff);
    disallowedSet.
        removeAll(labelSeparators).
        removeAll(deviationSet).
        removeAll(mappedSet).
        removeAll(baseValidSet).
        addAll(baseExclusionSet).
        addAll(unassignedSet);

    const icu::Normalizer2 *nfd=
        icu::Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode);
    errorCode.assertSuccess();

    icu::UnicodeSet ignoredSet;  // will be a subset of mappedSet
    icu::UnicodeSet removeSet;
    icu::UnicodeString nfdString;
    {
        icu::UnicodeSetIterator iter(mappedSet);
        while(iter.next()) {
            UChar32 c=iter.getCodepoint();
            cString.setTo(c);
            nfkc_cf->normalize(cString, mapping, errorCode);
            if(!baseValidSet.containsAll(mapping)) {
                fprintf(stderr, "U+%04lX mapped -> disallowed: mapping not wholly in base valid set\n", (long)c);
                disallowedSet.add(c);
                removeSet.add(c);
            } else if(mapping.isEmpty()) {
                ignoredSet.add(c);
            }
        }
        mappedSet.removeAll(removeSet);
    }
    errorCode.assertSuccess();

    icu::UnicodeSet validSet(baseValidSet);
    validSet.
        removeAll(labelSeparators).  // non-ASCII label separators will be mapped in the end
        removeAll(deviationSet).
        removeAll(disallowedSet).
        removeAll(mappedSet).
        add(0x2e);  // not mapped, simply valid
    UBool madeChange;
    do {
        madeChange=FALSE;
        {
            removeSet.clear();
            icu::UnicodeSetIterator iter(validSet);
            while(iter.next()) {
                UChar32 c=iter.getCodepoint();
                if(nfd->getDecomposition(c, nfdString) && !validSet.containsAll(nfdString)) {
                    fprintf(stderr, "U+%04lX valid -> disallowed: NFD not wholly valid\n", (long)c);
                    disallowedSet.add(c);
                    removeSet.add(c);
                    madeChange=TRUE;
                }
            }
            validSet.removeAll(removeSet);
        }
        {
            removeSet.clear();
            icu::UnicodeSetIterator iter(mappedSet);
            while(iter.next()) {
                UChar32 c=iter.getCodepoint();
                cString.setTo(c);
                nfkc_cf->normalize(cString, mapping, errorCode);
                nfd->normalize(mapping, nfdString, errorCode);
                if(!validSet.containsAll(nfdString)) {
                    fprintf(stderr, "U+%04lX mapped -> disallowed: NFD of mapping not wholly valid\n", (long)c);
                    disallowedSet.add(c);
                    removeSet.add(c);
                    madeChange=TRUE;
                }
            }
            mappedSet.removeAll(removeSet);
        }
    } while(madeChange);
    errorCode.assertSuccess();

    // finish up
    labelSeparators.remove(0x2e).freeze();  // U+002E is simply valid
    deviationSet.freeze();
    ignoredSet.freeze();
    validSet.freeze();
    mappedSet.freeze();
    disallowedSTD3Set.freeze();

    // output
    UChar32 prevStart=0, c=0;
    Status prevStatus=DISALLOWED_STD3_VALID, status;
    icu::UnicodeString prevMapping;
    UVersionInfo prevAge={ 1, 1, 0, 0 }, age;

    icu::UnicodeSetIterator iter(disallowedSet);
    while(iter.nextRange()) {
        UChar32 start=iter.getCodepoint();
        while(c<start) {
            mapping.remove();
            if(labelSeparators.contains(c)) {
                status=MAPPED;
                mapping.setTo(0x2e);
            } else if(deviationSet.contains(c)) {
                status=DEVIATION;
                cString.setTo(c);
                nfkc_cf->normalize(cString, mapping, errorCode);
            } else if(ignoredSet.contains(c)) {
                status=IGNORED;
            } else if(validSet.contains(c)) {
                if(disallowedSTD3Set.contains(c)) {
                    fprintf(stderr, "U+%04lX valid -> disallowed_STD3_valid: itself not STD3\n", (long)c);
                    status=DISALLOWED_STD3_VALID;
                } else if( nfd->getDecomposition(c, nfdString) &&
                    disallowedSTD3Set.containsSome(nfdString)
                ) {
                    fprintf(stderr, "U+%04lX valid -> disallowed_STD3_valid: NFD not wholly STD3\n", (long)c);
                    status=DISALLOWED_STD3_VALID;
                } else {
                    status=VALID;
                }
            } else if(mappedSet.contains(c)) {
                cString.setTo(c);
                nfkc_cf->normalize(cString, mapping, errorCode);
                if(disallowedSTD3Set.containsSome(mapping)) {
                    fprintf(stderr, "U+%04lX mapped -> disallowed_STD3_mapped\n", (long)c);
                    status=DISALLOWED_STD3_MAPPED;
                } else {
                    status=MAPPED;
                }
            } else {
                fprintf(stderr, "*** undetermined status of U+%04lX\n", (long)c);
            }
            // Print a new line where the status, the mapping or
            // the character age change.
            getAgeIfAssigned(c, age);
            if( prevStart<c &&
                (status!=prevStatus || mapping!=prevMapping || 0!=memcmp(prevAge, age, 4))
            ) {
                printLine(prevStart, c-1, prevStatus, prevMapping);
                prevStart=c;
                prevStatus=status;
                prevMapping=mapping;
                memcpy(prevAge, age, 4);
            }
            ++c;
        }
        // c==start is disallowed
        if(prevStart<c) {
            printLine(prevStart, c-1, prevStatus, prevMapping);
        }
        prevStart=c;
        prevStatus=DISALLOWED;
        prevMapping.remove();
        getAgeIfAssigned(c, prevAge);
        UChar32 end=iter.getCodepointEnd();
        while(++c<=end) {
            getAgeIfAssigned(c, age);
            if(prevStart<c && 0!=memcmp(prevAge, age, 4)) {
                printLine(prevStart, c-1, prevStatus, prevMapping);
                prevStart=c;
                memcpy(prevAge, age, 4);
            }
        }
    }
    if(prevStart<c) {
        printLine(prevStart, c-1, prevStatus, prevMapping);
    }
    return 0;
}