Exemplo n.º 1
0
static void TestAppend() {
    static const UChar32 codePoints[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0x10000,
        0x12345, 0xe0021, 0x10ffff, 0x110000,
        0x234567, 0x7fffffff, -1, -1000,
        0, 0x400
    };
    static const UChar expectUnsafe[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
        0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
        /* none from this line */
        0, 0x400
    }, expectSafe[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
        0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
        /* none from this line */
        0, 0x400
    };

    UChar buffer[100];
    UChar32 c;
    int32_t i, length;
    UBool isError, expectIsError, wrongIsError;

    length=0;
    for(i=0; i<LENGTHOF(codePoints); ++i) {
        c=codePoints[i];
        if(c<0 || 0x10ffff<c) {
            continue; /* skip non-code points for U16_APPEND_UNSAFE */
        }

        U16_APPEND_UNSAFE(buffer, length, c);
    }
    if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) {
        log_err("U16_APPEND_UNSAFE did not generate the expected output\n");
    }

    length=0;
    wrongIsError=FALSE;
    for(i=0; i<LENGTHOF(codePoints); ++i) {
        c=codePoints[i];
        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
        isError=FALSE;

        U16_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
        wrongIsError|= isError!=expectIsError;
    }
    if(wrongIsError) {
        log_err("U16_APPEND did not set isError correctly\n");
    }
    if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) {
        log_err("U16_APPEND did not generate the expected output\n");
    }
}
Exemplo n.º 2
0
 inline void append(UChar32 x) {
     if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
         fDest->addLiteral(fBuffer, 0, fIdx);
         fIdx = 0;
     }
     U16_APPEND_UNSAFE(fBuffer, fIdx, x);
 }
Exemplo n.º 3
0
/*
 * parse a list of code points
 * store them as a string in dest[destCapacity]
 * set the first code point in *pFirst
 * @return The length of the string in numbers of UChars.
 */
U_CAPI int32_t U_EXPORT2
u_parseString(const char *s,
              UChar *dest, int32_t destCapacity,
              uint32_t *pFirst,
              UErrorCode *pErrorCode) {
    char *end;
    uint32_t value;
    int32_t destLength;

    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    if(pFirst!=NULL) {
        *pFirst=0xffffffff;
    }

    destLength=0;
    for(;;) {
        s=u_skipWhitespace(s);
        if(*s==';' || *s==0) {
            if(destLength<destCapacity) {
                dest[destLength]=0;
            } else if(destLength==destCapacity) {
                *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
            } else {
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            }
            return destLength;
        }

        /* read one code point */
        value=(uint32_t)uprv_strtoul(s, &end, 16);
        if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
            *pErrorCode=U_PARSE_ERROR;
            return 0;
        }

        /* store the first code point */
        if(pFirst!=NULL) {
            *pFirst=value;
            pFirst=NULL;
        }

        /* append it to the destination array */
        if((destLength+U16_LENGTH(value))<=destCapacity) {
            U16_APPEND_UNSAFE(dest, destLength, value);
        } else {
            destLength+=U16_LENGTH(value);
        }

        /* go to the following characters */
        s=end;
    }
}
Exemplo n.º 4
0
/* Do an invariant conversion of char* -> UChar*, with escape parsing */
U_CAPI int32_t U_EXPORT2
u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
    const char *segment = src;
    int32_t i = 0;
    char c;

    while ((c=*src) != 0) {
        /* '\\' intentionally written as compiler-specific
         * character constant to correspond to compiler-specific
         * char* constants. */
        if (c == '\\') {
            int32_t lenParsed = 0;
            UChar32 c32;
            if (src != segment) {
                if (dest != NULL) {
                    _appendUChars(dest + i, destCapacity - i,
                                  segment, (int32_t)(src - segment));
                }
                i += (int32_t)(src - segment);
            }
            ++src; /* advance past '\\' */
            c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
            if (lenParsed == 0) {
                goto err;
            }
            src += lenParsed; /* advance past escape seq. */
            if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
                U16_APPEND_UNSAFE(dest, i, c32);
            } else {
                i += U16_LENGTH(c32);
            }
            segment = src;
        } else {
            ++src;
        }
    }
    if (src != segment) {
        if (dest != NULL) {
            _appendUChars(dest + i, destCapacity - i,
                          segment, (int32_t)(src - segment));
        }
        i += (int32_t)(src - segment);
    }
    if (dest != NULL && i < destCapacity) {
        dest[i] = 0;
    }
    return i;

 err:
    if (dest != NULL && destCapacity > 0) {
        *dest = 0;
    }
    return 0;
}
Exemplo n.º 5
0
static hb_bool_t getGlyph(hb_font_t* hbFont, void* fontData, hb_codepoint_t unicode, hb_codepoint_t variationSelector, hb_codepoint_t* glyph, void* userData)
{
    CTFontRef ctFont = reinterpret_cast<FontPlatformData*>(fontData)->ctFont();
    UniChar characters[4];
    CGGlyph cgGlyphs[4];
    size_t length = 0;
    U16_APPEND_UNSAFE(characters, length, unicode);
    if (!CTFontGetGlyphsForCharacters(ctFont, characters, cgGlyphs, length))
        return false;
    *glyph = cgGlyphs[0];
    return true;
}
Exemplo n.º 6
0
static void testIsPotentialCustomElementNameChar(UChar32 c, bool expected) {
  LChar str8[] = "a-X";
  UChar str16[] = {'a', '-', 'X', '\0', '\0'};
  AtomicString str;
  if (c <= 0xFF) {
    str8[2] = c;
    str = str8;
  } else {
    size_t i = 2;
    U16_APPEND_UNSAFE(str16, i, c);
    str16[i] = 0;
    str = str16;
  }
  testIsPotentialCustomElementName(str, expected);
}
Exemplo n.º 7
0
/*
 * Remove toUnicode fallbacks and non-<subchar1> SUB mappings
 * which are irrelevant for the fromUnicode extension table.
 * Remove MBCS_FROM_U_EXT_FLAG bits.
 * Overwrite the reverseMap with an index array to the relevant mappings.
 * Modify the code point sequences to a generator-friendly format where
 * the first code points remains unchanged but the following are recoded
 * into 16-bit Unicode string form.
 * The table must be sorted.
 * Destroys previous data in the reverseMap.
 */
static int32_t
prepareFromUMappings(UCMTable *table) {
    UCMapping *mappings, *m;
    int32_t *map;
    int32_t i, j, count;
    int8_t flag;

    mappings=table->mappings;
    map=table->reverseMap;
    count=table->mappingsLength;

    /*
     * we do not go through the map on input because the mappings are
     * sorted lexically
     */
    m=mappings;

    for(i=j=0; i<count; ++m, ++i) {
        flag=m->f;
        if(flag>=0) {
            flag&=MBCS_FROM_U_EXT_MASK;
            m->f=flag;
        }
        if(flag==0 || flag==1 || (flag==2 && m->bLen==1) || flag==4) {
            map[j++]=i;

            if(m->uLen>1) {
                /* recode all but the first code point to 16-bit Unicode */
                UChar32 *u32;
                UChar *u;
                UChar32 c;
                int32_t q, r;

                u32=UCM_GET_CODE_POINTS(table, m);
                u=(UChar *)u32; /* destructive in-place recoding */
                for(r=2, q=1; q<m->uLen; ++q) {
                    c=u32[q];
                    U16_APPEND_UNSAFE(u, r, c);
                }

                /* counts the first code point always at 2 - the first 16-bit unit is at 16-bit index 2 */
                m->uLen=(int8_t)r;
            }
        }
    }

    return j;
}
Exemplo n.º 8
0
	void ICUUnicodeSupport::_toLowerCase<2>(StringHolder<2> _str)
	{
		if(!_str.empty())
		{
			uint16_t* buf = &_str[0];
			int32_t len = _str.length();
			int32_t ofs = 0, ofs2 = 0;
			while(ofs != len)
			{
				UChar32 c;
				U16_NEXT(buf, ofs, len, c);
				c = u_tolower(c);
				U16_APPEND_UNSAFE( buf, ofs2, c);
			}
		}
	}
Exemplo n.º 9
0
/**
 * Decode a BOCU-1 byte sequence to a UTF-16 string.
 * Does not check for overflows, but otherwise useful function.
 *
 * @param p pointer to input BOCU-1 bytes
 * @param length number of input bytes
 * @param s point to output UTF-16 string array
 * @return number of UChar code units output
 */
static int32_t
readString(const uint8_t *p, int32_t length, UChar *s) {
    Bocu1Rx rx={ 0, 0, 0 };
    int32_t c, i, sLength;

    i=sLength=0;
    while(i<length) {
        c=decodeBocu1(&rx, p[i++]);
        if(c<-1) {
            log_err("error: readString detects encoding error at string index %ld\n", i);
            return -1;
        }
        if(c>=0) {
            U16_APPEND_UNSAFE(s, sLength, c);
        }
    }
    return sLength;
}
Exemplo n.º 10
0
U_CAPI int32_t U_EXPORT2
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) { return 0; }
    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }
    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    int32_t length;
    if(sampleChar == 0) {
        length = 0;
    } else {
        length = U16_LENGTH(sampleChar);
        if(length <= capacity) {
            int32_t i = 0;
            U16_APPEND_UNSAFE(dest, i, sampleChar);
        }
    }
    return u_terminateUChars(dest, capacity, length, pErrorCode);
}
Exemplo n.º 11
0
static void
doCaseConvert(
            XMLCh*          convertString,
            FunctionType    caseFunction)
{
    // Note the semantics of this function are broken, since it's
    // possible that changing the case of a string could increase
    // its length, but there's no way to handle such a situation.
    const unsigned int  len =
            XMLString::stringLen(convertString);

    size_t  readPos = 0;
    size_t  writePos = 0;

    while(readPos < len)
    {
        UChar32     original;

        // Get the next Unicode code point.
        U16_NEXT_UNSAFE(convertString, readPos, original);

        // Convert the code point
        const UChar32   converted = caseFunction(original);

        // OK, now here's where it gets ugly.
        if (!U_IS_BMP(converted) && U_IS_BMP(original) &&
            readPos - writePos == 1)
        {
            // We do not have room to convert the
            // character without overwriting the next
            // character, so we will just stop.
            break;
        }
        else
        {
            U16_APPEND_UNSAFE(convertString, writePos, converted);
        }
    }

    convertString[writePos] = 0;
}
Exemplo n.º 12
0
/**
 * Performs character mirroring.
 * 
 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
 * @param pErrorCode Pointer to the error code value.
 *
 * @return Whether or not this function modifies the text. Besides the return
 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
 */
static UBool
action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
    UChar32 c;
    uint32_t i = 0, j = 0;
    if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
        return FALSE;
    }
    if (pTransform->destSize < pTransform->srcLength) {
        *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
        return FALSE;
    }
    do {
        UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
        U16_NEXT(pTransform->src, i, pTransform->srcLength, c); 
        U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
    } while (i < pTransform->srcLength);
    
    *pTransform->pDestLength = pTransform->srcLength;
    pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
    return TRUE;
}
Exemplo n.º 13
0
static void
addUnfolding(UChar32 c, const UChar *s, int32_t length) {
    int32_t i;

    if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
        fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
                (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }
    if(unfoldTop >= (LENGTHOF(unfold) - UGENCASE_UNFOLD_STRING_WIDTH)) {
        fprintf(stderr, "gencase error: too many multi-character case foldings\n");
        exit(U_BUFFER_OVERFLOW_ERROR);
    }
    u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH);
    u_memcpy(unfold+unfoldTop, s, length);

    i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH;
    U16_APPEND_UNSAFE(unfold, i, c);

    ++unfoldRows;
    unfoldTop+=UGENCASE_UNFOLD_WIDTH;
}
Exemplo n.º 14
0
static int
toIDNA2003(const UStringPrepProfile *prep, UChar32 c, icu::UnicodeString &destString) {
    UChar src[2];
    int32_t srcLength=0;
    U16_APPEND_UNSAFE(src, srcLength, c);
    UChar *dest;
    int32_t destLength;
    dest=destString.getBuffer(32);
    if(dest==NULL) {
        return FALSE;
    }
    UErrorCode errorCode=U_ZERO_ERROR;
    destLength=usprep_prepare(prep, src, srcLength,
                              dest, destString.getCapacity(),
                              USPREP_DEFAULT, NULL, &errorCode);
    destString.releaseBuffer(destLength);
    if(errorCode==U_STRINGPREP_PROHIBITED_ERROR) {
        return -1;
    } else {
        // Returns FALSE=0 for U_STRINGPREP_UNASSIGNED_ERROR and processing errors,
        // TRUE=1 if c is valid or mapped.
        return U_SUCCESS(errorCode);
    }
}
Exemplo n.º 15
0
static void
testTrieIteration(const char *testName,
                  const UTrie *trie,
                  const CheckRange checkRanges[], int32_t countCheckRanges) {
    UChar s[100];
    uint32_t values[30];

    const UChar *p, *limit;

    uint32_t value;
    UChar32 c;
    int32_t i, length, countValues;
    UChar c2;

    /* write a string */
    length=countValues=0;
    for(i=0; i<countCheckRanges; ++i) {
        c=checkRanges[i].limit;
        if(c!=0) {
            --c;
            U16_APPEND_UNSAFE(s, length, c);
            values[countValues++]=checkRanges[i].value;
        }
    }
    limit=s+length;

    /* try forward */
    p=s;
    i=0;
    while(p<limit) {
        c=c2=0x33;
        if(trie->data32!=NULL) {
            UTRIE_NEXT32(trie, p, limit, c, c2, value);
        } else {
            UTRIE_NEXT16(trie, p, limit, c, c2, value);
        }
        if(value!=values[i]) {
            log_err("error: wrong value from UTRIE_NEXT(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
                    testName, c, c2, value, values[i]);
        }
        if(
            c2==0 ?
                c!=*(p-1) :
                !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
        ) {
            log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n",
                    testName, c, c2);
            continue;
        }
        if(c2!=0) {
            int32_t offset;

            if(trie->data32==NULL) {
                value=UTRIE_GET16_FROM_LEAD(trie, c);
                offset=trie->getFoldingOffset(value);
                if(offset>0) {
                    value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2);
                } else {
                    value=trie->initialValue;
                }
            } else {
                value=UTRIE_GET32_FROM_LEAD(trie, c);
                offset=trie->getFoldingOffset(value);
                if(offset>0) {
                    value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2);
                } else {
                    value=trie->initialValue;
                }
            }
            if(value!=values[i]) {
                log_err("error: wrong value from UTRIE_GETXX_FROM_OFFSET_TRAIL(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
                        testName, c, c2, value, values[i]);
            }
        }
        if(c2!=0) {
            value=0x44;
            if(trie->data32==NULL) {
                UTRIE_GET16_FROM_PAIR(trie, c, c2, value);
            } else {
                UTRIE_GET32_FROM_PAIR(trie, c, c2, value);
            }
            if(value!=values[i]) {
                log_err("error: wrong value from UTRIE_GETXX_FROM_PAIR(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
                        testName, c, c2, value, values[i]);
            }
        }
        ++i;
    }

    /* try backward */
    p=limit;
    i=countValues;
    while(s<p) {
        --i;
        c=c2=0x33;
        if(trie->data32!=NULL) {
            UTRIE_PREVIOUS32(trie, s, p, c, c2, value);
        } else {
            UTRIE_PREVIOUS16(trie, s, p, c, c2, value);
        }
        if(value!=values[i]) {
            log_err("error: wrong value from UTRIE_PREVIOUS(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
                    testName, c, c2, value, values[i]);
        }
        if(
            c2==0 ?
                c!=*p:
                !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
        ) {
            log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n",
                    testName, c, c2);
        }
    }
}
Exemplo n.º 16
0
/* internal function */
U_CFUNC int32_t
u_strcmpFold(const UChar *s1, int32_t length1,
             const UChar *s2, int32_t length2,
             uint32_t options,
             UErrorCode *pErrorCode) {
    const UCaseProps *csp;

    /* current-level start/limit - s1/s2 as current */
    const UChar *start1, *start2, *limit1, *limit2;

    /* case folding variables */
    const UChar *p;
    int32_t length;

    /* stacks of previous-level start/current/limit */
    CmpEquivLevel stack1[2], stack2[2];

    /* case folding buffers, only use current-level start/limit */
    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

    /* track which is the current level per string */
    int32_t level1, level2;

    /* current code units, and code points for lookups */
    UChar32 c1, c2, cp1, cp2;

    /* no argument error checking because this itself is not an API */

    /*
     * assume that at least the option U_COMPARE_IGNORE_CASE is set
     * otherwise this function would have to behave exactly as uprv_strCompare()
     */
    csp=ucase_getSingleton();
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /* initialize */
    start1=s1;
    if(length1==-1) {
        limit1=NULL;
    } else {
        limit1=s1+length1;
    }

    start2=s2;
    if(length2==-1) {
        limit2=NULL;
    } else {
        limit2=s2+length2;
    }

    level1=level2=0;
    c1=c2=-1;

    /* comparison loop */
    for(;;) {
        /*
         * here a code unit value of -1 means "get another code unit"
         * below it will mean "this source is finished"
         */

        if(c1<0) {
            /* get next code unit from string 1, post-increment */
            for(;;) {
                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
                    if(level1==0) {
                        c1=-1;
                        break;
                    }
                } else {
                    ++s1;
                    break;
                }

                /* reached end of level buffer, pop one level */
                do {
                    --level1;
                    start1=stack1[level1].start;
                } while(start1==NULL);
                s1=stack1[level1].s;
                limit1=stack1[level1].limit;
            }
        }

        if(c2<0) {
            /* get next code unit from string 2, post-increment */
            for(;;) {
                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
                    if(level2==0) {
                        c2=-1;
                        break;
                    }
                } else {
                    ++s2;
                    break;
                }

                /* reached end of level buffer, pop one level */
                do {
                    --level2;
                    start2=stack2[level2].start;
                } while(start2==NULL);
                s2=stack2[level2].s;
                limit2=stack2[level2].limit;
            }
        }

        /*
         * compare c1 and c2
         * either variable c1, c2 is -1 only if the corresponding string is finished
         */
        if(c1==c2) {
            if(c1<0) {
                return 0;   /* c1==c2==-1 indicating end of strings */
            }
            c1=c2=-1;       /* make us fetch new code units */
            continue;
        } else if(c1<0) {
            return -1;      /* string 1 ends before string 2 */
        } else if(c2<0) {
            return 1;       /* string 2 ends before string 1 */
        }
        /* c1!=c2 && c1>=0 && c2>=0 */

        /* get complete code points for c1, c2 for lookups if either is a surrogate */
        cp1=c1;
        if(U_IS_SURROGATE(c1)) {
            UChar c;

            if(U_IS_SURROGATE_LEAD(c1)) {
                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
                }
            } else /* isTrail(c1) */ {
                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
                }
            }
        }

        cp2=c2;
        if(U_IS_SURROGATE(c2)) {
            UChar c;

            if(U_IS_SURROGATE_LEAD(c2)) {
                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
                }
            } else /* isTrail(c2) */ {
                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
                }
            }
        }

        /*
         * go down one level for each string
         * continue with the main loop as soon as there is a real change
         */

        if( level1==0 &&
            (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
        ) {
            /* cp1 case-folds to the code point "length" or to p[length] */
            if(U_IS_SURROGATE(c1)) {
                if(U_IS_SURROGATE_LEAD(c1)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s1;
                } else /* isTrail(c1) */ {
                    /*
                     * we got a supplementary code point when hitting its trail surrogate,
                     * therefore the lead surrogate must have been the same as in the other string;
                     * compare this decomposition with the lead surrogate in the other string
                     * remember that this simulates bulk text replacement:
                     * the decomposition would replace the entire code point
                     */
                    --s2;
                    c2=*(s2-1);
                }
            }

            /* push current level pointers */
            stack1[0].start=start1;
            stack1[0].s=s1;
            stack1[0].limit=limit1;
            ++level1;

            /* copy the folding result to fold1[] */
            if(length<=UCASE_MAX_STRING_LENGTH) {
                u_memcpy(fold1, p, length);
            } else {
                int32_t i=0;
                U16_APPEND_UNSAFE(fold1, i, length);
                length=i;
            }

            /* set next level pointers to case folding */
            start1=s1=fold1;
            limit1=fold1+length;

            /* get ready to read from decomposition, continue with loop */
            c1=-1;
            continue;
        }

        if( level2==0 &&
            (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
        ) {
            /* cp2 case-folds to the code point "length" or to p[length] */
            if(U_IS_SURROGATE(c2)) {
                if(U_IS_SURROGATE_LEAD(c2)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s2;
                } else /* isTrail(c2) */ {
                    /*
                     * we got a supplementary code point when hitting its trail surrogate,
                     * therefore the lead surrogate must have been the same as in the other string;
                     * compare this decomposition with the lead surrogate in the other string
                     * remember that this simulates bulk text replacement:
                     * the decomposition would replace the entire code point
                     */
                    --s1;
                    c1=*(s1-1);
                }
            }

            /* push current level pointers */
            stack2[0].start=start2;
            stack2[0].s=s2;
            stack2[0].limit=limit2;
            ++level2;

            /* copy the folding result to fold2[] */
            if(length<=UCASE_MAX_STRING_LENGTH) {
                u_memcpy(fold2, p, length);
            } else {
                int32_t i=0;
                U16_APPEND_UNSAFE(fold2, i, length);
                length=i;
            }

            /* set next level pointers to case folding */
            start2=s2=fold2;
            limit2=fold2+length;

            /* get ready to read from decomposition, continue with loop */
            c2=-1;
            continue;
        }

        /*
         * no decomposition/case folding, max level for both sides:
         * return difference result
         *
         * code point order comparison must not just return cp1-cp2
         * because when single surrogates are present then the surrogate pairs
         * that formed cp1 and cp2 may be from different string indexes
         *
         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
         * c1=d800 cp1=10001 c2=dc00 cp2=10000
         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
         *
         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
         * so we have slightly different pointer/start/limit comparisons here
         */

        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
            if(
                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
            ) {
                /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c1-=0x2800;
            }

            if(
                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
            ) {
                /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c2-=0x2800;
            }
        }

        return c1-c2;
    }
}
Exemplo n.º 17
0
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
             UStringPrepType type, UErrorCode* status){
    
 
    UChar* map = NULL;
    int16_t adjustedLen=0, i, j;
    uint16_t trieWord = 0;
    ValueStruct *value = NULL;
    uint32_t savedTrieWord = 0;

    /* initialize the hashtable */
    if(hashTable==NULL){
        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
        uhash_setValueDeleter(hashTable, valueDeleter);
    }
    
    /* figure out if the code point has type already stored */
    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
    if(savedTrieWord!=0){
        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
            /* turn on the first bit in trie word */
            trieWord += 0x01;
        }else{
            /* 
             * the codepoint has value something other than prohibited
             * and a mapping .. error! 
             */
            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
            exit(U_ILLEGAL_ARGUMENT_ERROR); 
        } 
    }

    /* figure out the real length */ 
    for(i=0; i<length; i++){
        adjustedLen += U16_LENGTH(mapping[i]);
    }

    if(adjustedLen == 0){
        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
        /* make sure that the value of trieWord is less than the threshold */
        if(trieWord < _SPREP_TYPE_THRESHOLD){   
            /* now set the value in the trie */
            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                fprintf(stderr,"Could not set the value for code point.\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);   
            }
            /* value is set so just return */
            return;
        }else{
            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
            exit(U_ILLEGAL_CHAR_FOUND);
        }
    }

    if(adjustedLen == 1){
        /* calculate the delta */
        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){

            trieWord = delta << 2;


            /* make sure that the second bit is OFF */
            if((trieWord & 0x02) != 0 ){
                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
                exit(U_INTERNAL_PROGRAM_ERROR);
            }
            /* make sure that the value of trieWord is less than the threshold */
            if(trieWord < _SPREP_TYPE_THRESHOLD){   
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }
                /* value is set so just return */
                return;
            }
        }
        /* 
         * if the delta is not in the given range or if the trieWord is larger than the threshold
         * just fall through for storing the mapping in the mapping table
         */
    }

    map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
    
    for (i=0, j=0; i<length; i++) {
        U16_APPEND_UNSAFE(map, j, mapping[i]);
    }
    
    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
    value->mapping = map;
    value->type    = type;
    value->length  = adjustedLen;
    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
        mappingDataCapacity++;
    }
    if(maxLength < value->length){
        maxLength = value->length;
    }
    uhash_iput(hashTable,codepoint,value,status);
    mappingDataCapacity += adjustedLen;

    if(U_FAILURE(*status)){
        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
        exit(*status);
    }
}
Exemplo n.º 18
0
/*
 * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
 * semantically write RTL runs in reverse and later reverse them again.
 * Instead, we actually write them in forward order to begin with.
 * However, if the RTL run was to be mirrored, we need to mirror here now
 * since the implicit second reversal must not do it.
 * It looks strange to do mirroring in LTR output, but it is only because
 * we are writing RTL output in reverse.
 */
static int32_t
doWriteForward(const UChar *src, int32_t srcLength,
               UChar *dest, int32_t destSize,
               uint16_t options,
               UErrorCode *pErrorCode) {
    /* optimize for several combinations of options */
    switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
    case 0: {
        /* simply copy the LTR run to the destination */
        int32_t length=srcLength;
        if(destSize<length) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return srcLength;
        }
        do {
            *dest++=*src++;
        } while(--length>0);
        return srcLength;
    }
    case UBIDI_DO_MIRRORING: {
        /* do mirroring */
        int32_t i=0, j=0;
        UChar32 c;

        if(destSize<srcLength) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return srcLength;
        }
        do {
            U16_NEXT(src, i, srcLength, c);
            c=u_charMirror(c);
            U16_APPEND_UNSAFE(dest, j, c);
        } while(i<srcLength);
        return srcLength;
    }
    case UBIDI_REMOVE_BIDI_CONTROLS: {
        /* copy the LTR run and remove any BiDi control characters */
        int32_t remaining=destSize;
        UChar c;
        do {
            c=*src++;
            if(!IS_BIDI_CONTROL_CHAR(c)) {
                if(--remaining<0) {
                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

                    /* preflight the length */
                    while(--srcLength>0) {
                        c=*src++;
                        if(!IS_BIDI_CONTROL_CHAR(c)) {
                            --remaining;
                        }
                    }
                    return destSize-remaining;
                }
                *dest++=c;
            }
        } while(--srcLength>0);
        return destSize-remaining;
    }
    default: {
        /* remove BiDi control characters and do mirroring */
        int32_t remaining=destSize;
        int32_t i, j=0;
        UChar32 c;
        do {
            i=0;
            U16_NEXT(src, i, srcLength, c);
            src+=i;
            srcLength-=i;
            if(!IS_BIDI_CONTROL_CHAR(c)) {
                remaining-=i;
                if(remaining<0) {
                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

                    /* preflight the length */
                    while(srcLength>0) {
                        c=*src++;
                        if(!IS_BIDI_CONTROL_CHAR(c)) {
                            --remaining;
                        }
                        --srcLength;
                    }
                    return destSize-remaining;
                }
                c=u_charMirror(c);
                U16_APPEND_UNSAFE(dest, j, c);
            }
        } while(srcLength>0);
        return j;
    }
    } /* end of switch */
}
Exemplo n.º 19
0
static int32_t
doWriteReverse(const UChar *src, int32_t srcLength,
               UChar *dest, int32_t destSize,
               uint16_t options,
               UErrorCode *pErrorCode) {
    /*
     * RTL run -
     *
     * RTL runs need to be copied to the destination in reverse order
     * of code points, not code units, to keep Unicode characters intact.
     *
     * The general strategy for this is to read the source text
     * in backward order, collect all code units for a code point
     * (and optionally following combining characters, see below),
     * and copy all these code units in ascending order
     * to the destination for this run.
     *
     * Several options request whether combining characters
     * should be kept after their base characters,
     * whether BiDi control characters should be removed, and
     * whether characters should be replaced by their mirror-image
     * equivalent Unicode characters.
     */
    int32_t i, j;
    UChar32 c;

    /* optimize for several combinations of options */
    switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
    case 0:
        /*
         * With none of the "complicated" options set, the destination
         * run will have the same length as the source run,
         * and there is no mirroring and no keeping combining characters
         * with their base characters.
         */
        if(destSize<srcLength) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return srcLength;
        }
        destSize=srcLength;

        /* preserve character integrity */
        do {
            /* i is always after the last code unit known to need to be kept in this segment */
            i=srcLength;

            /* collect code units for one base character */
            U16_BACK_1(src, 0, srcLength);

            /* copy this base character */
            j=srcLength;
            do {
                *dest++=src[j++];
            } while(j<i);
        } while(srcLength>0);
        break;
    case UBIDI_KEEP_BASE_COMBINING:
        /*
         * Here, too, the destination
         * run will have the same length as the source run,
         * and there is no mirroring.
         * We do need to keep combining characters with their base characters.
         */
        if(destSize<srcLength) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return srcLength;
        }
        destSize=srcLength;

        /* preserve character integrity */
        do {
            /* i is always after the last code unit known to need to be kept in this segment */
            i=srcLength;

            /* collect code units and modifier letters for one base character */
            do {
                U16_PREV(src, 0, srcLength, c);
            } while(srcLength>0 && IS_COMBINING(u_charType(c)));

            /* copy this "user character" */
            j=srcLength;
            do {
                *dest++=src[j++];
            } while(j<i);
        } while(srcLength>0);
        break;
    default:
        /*
         * With several "complicated" options set, this is the most
         * general and the slowest copying of an RTL run.
         * We will do mirroring, remove BiDi controls, and
         * keep combining characters with their base characters
         * as requested.
         */
        if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
            i=srcLength;
        } else {
            /* we need to find out the destination length of the run,
               which will not include the BiDi control characters */
            int32_t length=srcLength;
            UChar ch;

            i=0;
            do {
                ch=*src++;
                if(!IS_BIDI_CONTROL_CHAR(ch)) {
                    ++i;
                }
            } while(--length>0);
            src-=srcLength;
        }

        if(destSize<i) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return i;
        }
        destSize=i;

        /* preserve character integrity */
        do {
            /* i is always after the last code unit known to need to be kept in this segment */
            i=srcLength;

            /* collect code units for one base character */
            U16_PREV(src, 0, srcLength, c);
            if(options&UBIDI_KEEP_BASE_COMBINING) {
                /* collect modifier letters for this base character */
                while(srcLength>0 && IS_COMBINING(u_charType(c))) {
                    U16_PREV(src, 0, srcLength, c);
                }
            }

            if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
                /* do not copy this BiDi control character */
                continue;
            }

            /* copy this "user character" */
            j=srcLength;
            if(options&UBIDI_DO_MIRRORING) {
                /* mirror only the base character */
                int32_t k=0;
                c=u_charMirror(c);
                U16_APPEND_UNSAFE(dest, k, c);
                dest+=k;
                j+=k;
            }
            while(j<i) {
                *dest++=src[j++];
            }
        } while(srcLength>0);
        break;
    } /* end of switch */

    return destSize;
}