Beispiel #1
0
static void TestCodeUnitValues()
{
    static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
    
    int16_t i;
    for(i=0; i<sizeof(codeunit)/sizeof(codeunit[0]); i++){
        uint8_t c=codeunit[i];
        log_verbose("Testing code unit value of %x\n", c);
        if(i<4){
            if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
                log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
            }
        } else if(i< 8){
            if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
                log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
            }
        } else if(i< 12){
            if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
                log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
            }
        }
    }
}
Beispiel #2
0
static int utf8_is_valid(const unsigned char *buf, size_t bufsize) {
    unsigned int i = 0;
    while (i < bufsize) {
        unsigned char c = buf[i];
        if (c > 127) {
            if (!U8_IS_LEAD(c)) {
                return 0;
            }
            i++;
            unsigned int char_length;

			char_length = U8_COUNT_TRAIL_BYTES(c);
            if (char_length > (bufsize - i) - 1) {
                return 0;
            }
            while (char_length > 0) {
                assert(i < bufsize);
                if (!U8_IS_TRAIL(buf[i])) {
                    return 0;
                }
                char_length--;
                i++;
            }
            continue;
        }
        i++;
    }
    return utf8_iterate_codepoints(buf, bufsize,
        &utf8_codepoint_is_categorized);
}
Beispiel #3
0
static void
str_write_java(const UChar *src, int32_t srcLen, UBool printEndLine, UErrorCode *status) {

    uint32_t length = srcLen*8;
    uint32_t bufLen = 0;
    uint32_t columnCount;
    char* buf = (char*) malloc(sizeof(char)*length);

    if(buf == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    columnCount = getColumnCount(srcLen);
    memset(buf,0,length);

    bufLen = uCharsToChars(buf,length,src,srcLen,status);
    // buflen accounts for extra bytes added due to multi byte encoding of
    //        non ASCII characters
    if(printEndLine)
        write_tabs(out);

    if(U_FAILURE(*status)){
        uprv_free(buf);
        return;
    }

    if(bufLen+(tabCount*4) > columnCount  ){
        uint32_t len = 0;
        char* current = buf;
        uint32_t add;
        while(len < bufLen){
            add = columnCount-(tabCount*4)-5/* for ", +\n */;
            current = buf +len;
            if (add < (bufLen-len)) {
                uint32_t idx = strrch(current,add,'\\');
                if (idx > add) {
                    idx = add;
                } else {
                    int32_t num =idx-1;
                    uint32_t seqLen;
                    while(num>0){
                        if(current[num]=='\\'){
                            num--;
                        }else{
                            break;
                        }
                    }
                    if ((idx-num)%2==0) {
                        idx--;
                    }
                    seqLen = (current[idx+1]=='u') ? 6 : 2;
                    if ((add-idx) < seqLen) {
                        add = idx + seqLen;
                    }
                }
            }
            T_FileStream_write(out,"\"",1);
            uint32_t byteIndex = 0;
            uint32_t trailBytes = 0;
            if(len+add<bufLen){
                // check the trail bytes to be added to the output line
                while (byteIndex < add) {
                    if (U8_IS_LEAD(*(current + byteIndex))) {
                        trailBytes = U8_COUNT_TRAIL_BYTES(*(current + byteIndex));
                        add += trailBytes;
                    }
                    byteIndex++;
                }
                T_FileStream_write(out,current,add);
                if (len + add < bufLen) {
                    T_FileStream_write(out,"\" +\n",4);
                    write_tabs(out);
                }
            }else{
                T_FileStream_write(out,current,bufLen-len);
            }
            len+=add;
        }
    }else{
        T_FileStream_write(out,"\"",1);
        T_FileStream_write(out, buf,bufLen);
    }
    if(printEndLine){
        T_FileStream_write(out,"\",\n",3);
    }else{
        T_FileStream_write(out,"\"",1);
    }
    uprv_free(buf);
}