Esempio n. 1
0
/* Decodes all the buffers we have, and returns a string of all decoded chars.
 * There may still be more to read after this, due to incomplete multi-byte
 * or multi-codepoint sequences that are not yet completely processed. */
MVMString * MVM_string_decodestream_get_available(MVMThreadContext *tc, MVMDecodeStream *ds) {
    if (ds->bytes_head) {
        ds->result_size_guess = ds->bytes_head->length;
        run_decode(tc, ds, NULL, NULL, DECODE_NOT_EOF);
    }
    return get_all_in_buffer(tc, ds);
}
Esempio n. 2
0
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds,
                                                  MVMDecodeStreamSeparators *sep_spec, MVMint32 chomp) {
    MVMint32 sep_loc, sep_length;

    /* Look for separator, trying more decoding if it fails. We get the place
     * just beyond the separator, so can use take_chars to get what's need.
     * Note that decoders are only responsible for finding the final char of
     * the separator, so we may need to loop a few times around this. */
    sep_loc = find_separator(tc, ds, sep_spec, &sep_length, 0);
    while (!sep_loc) {
        MVMuint32 decode_outcome = run_decode(tc, ds, NULL, sep_spec, DECODE_NOT_EOF);
        if (decode_outcome == RUN_DECODE_NOTHING_DECODED)
            break;
        if (decode_outcome == RUN_DECODE_STOPPER_REACHED)
            sep_loc = find_separator(tc, ds, sep_spec, &sep_length, 0);
    }
    if (sep_loc) {
        /* Use this line length as a guesstimate of the next, unless it's tiny
         * in which case we treat it as an outlier (probably an empty line or
         * some such). Also round up and to a nice power of 2. */
        if (sep_loc > 32)
            ds->result_size_guess = (sep_loc << 1) & ~0xF;
        return take_chars(tc, ds, sep_loc, chomp ? sep_length : 0);
    }
    else {
        return NULL;
    }
}
Esempio n. 3
0
MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStream *ds,
                                              MVMint32 chars, MVMint64 eof) {
    MVMint32 missing;

    /* If we request nothing, give empty string. */
    if (chars == 0)
        return tc->instance->str_consts.empty;

    /* If we don't already have enough chars, try and decode more. */
    missing = missing_chars(tc, ds, chars);
    ds->result_size_guess = missing;
    if (missing)
        run_decode(tc, ds, &missing, NULL, DECODE_NOT_EOF);

    /* If we've got enough, assemble a string. Otherwise, flag EOF and retry,
     * falling back to returning what's available. */
    if (missing_chars(tc, ds, chars) == 0) {
        return take_chars(tc, ds, chars, 0);
    }
    else if (eof) {
        reached_eof(tc, ds);
        return missing_chars(tc, ds, chars) == 0
            ? take_chars(tc, ds, chars, 0)
            : MVM_string_decodestream_get_all(tc, ds);
    }
    else {
        return NULL;
    }
}
Esempio n. 4
0
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds, MVMGrapheme32 sep) {
    MVMint32 sep_loc;

    /* Look for separator, trying more decoding if it fails. We get the place
     * just beyond the separator, so can use take_chars to get what's need. */
    sep_loc = find_separator(tc, ds, sep);
    if (!sep_loc) {
        run_decode(tc, ds, NULL, &sep);
        sep_loc = find_separator(tc, ds, sep);
    }
    if (sep_loc)
        return take_chars(tc, ds, sep_loc);
    else
        return NULL;
}
Esempio n. 5
0
/* In situations where we have hit EOF, we need to decode what's left and flush
 * the normalization buffer also. */
static void reached_eof(MVMThreadContext *tc, MVMDecodeStream *ds) {
    /* Decode all the things. */
    if (ds->bytes_head)
        run_decode(tc, ds, NULL, NULL);

    /* If there's some things left in the normalization buffer, take them. */
    MVM_unicode_normalizer_eof(tc, &(ds->norm));
    if (MVM_unicode_normalizer_available(tc, &(ds->norm))) {
        MVMint32 ready = MVM_unicode_normalizer_available(tc, &(ds->norm));
        MVMGrapheme32 *buffer = MVM_malloc(ready * sizeof(MVMGrapheme32));
        MVMint32 count = 0;
        while (ready--)
            buffer[count++] = MVM_unicode_normalizer_get_grapheme(tc, &(ds->norm));
        MVM_string_decodestream_add_chars(tc, ds, buffer, count);
    }
}
Esempio n. 6
0
MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMint32 chars) {
    MVMint32 missing;

    /* If we request nothing, give empty string. */
    if (chars == 0)
        return tc->instance->str_consts.empty;

    /* If we don't already have enough chars, try and decode more. */
    missing = missing_chars(tc, ds, chars);
    if (missing)
        run_decode(tc, ds, &missing, NULL);

    /* If we've got enough, assemble a string. Otherwise, give up. */
    if (missing_chars(tc, ds, chars) == 0)
        return take_chars(tc, ds, chars, 0);
    else
        return NULL;
}
Esempio n. 7
0
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds,
                                                  MVMDecodeStreamSeparators *sep_spec, MVMint32 chomp) {
    MVMint32 sep_loc, sep_length;

    /* Look for separator, trying more decoding if it fails. We get the place
     * just beyond the separator, so can use take_chars to get what's need.
     * Note that decoders are only responsible for finding the final char of
     * the separator, so we may need to loop a few times around this. */
    sep_loc = find_separator(tc, ds, sep_spec, &sep_length);
    while (!sep_loc) {
        if (!run_decode(tc, ds, NULL, sep_spec))
            break;
        sep_loc = find_separator(tc, ds, sep_spec, &sep_length);
    }
    if (sep_loc)
        return take_chars(tc, ds, sep_loc, chomp ? sep_length : 0);
    else
        return NULL;
}
Esempio n. 8
0
    void run_benchmark()
    {
        gauge::config_set cs = get_current_configuration();

        std::string type = cs.get_value<std::string>("type");

        if (type == "encoder")
        {
            run_encode();
        }
        else if (type == "decoder")
        {
            run_decode();
        }
        else
        {
            assert(0);
        }
    }
Esempio n. 9
0
/* Decodes all the buffers, producing a string containing all the decoded
 * characters. */
MVMString * MVM_string_decodestream_get_all(MVMThreadContext *tc, MVMDecodeStream *ds) {
    MVMString *result = (MVMString *)MVM_repr_alloc_init(tc, tc->instance->VMString);
    result->body.storage_type = MVM_STRING_GRAPHEME_32;

    /* Decode all the things. */
    run_decode(tc, ds, NULL, NULL);

    /* If there's some things left in the normalization buffer, take them. */
    MVM_unicode_normalizer_eof(tc, &(ds->norm));
    if (MVM_unicode_normalizer_available(tc, &(ds->norm))) {
        MVMint32 ready = MVM_unicode_normalizer_available(tc, &(ds->norm));
        MVMGrapheme32 *buffer = MVM_malloc(ready * sizeof(MVMGrapheme32));
        MVMint32 count = 0;
        while (ready--)
            buffer[count++] = MVM_unicode_normalizer_get_grapheme(tc, &(ds->norm));
        MVM_string_decodestream_add_chars(tc, ds, buffer, count);
    }

    /* If there's no codepoint buffer, then return the empty string. */
    if (!ds->chars_head) {
        result->body.storage.blob_32 = NULL;
        result->body.num_graphs      = 0;
    }

    /* If there's exactly one resulting codepoint buffer and we swallowed none
     * of it, just use it. */
    else if (ds->chars_head == ds->chars_tail && ds->chars_head_pos == 0) {
        /* Set up result string. */
        result->body.storage.blob_32 = ds->chars_head->chars;
        result->body.num_graphs      = ds->chars_head->length;

        /* Don't free the buffer's memory itself, just the holder, as we
         * stole that for the buffer into the string above. */
        MVM_free(ds->chars_head);
        ds->chars_head = ds->chars_tail = NULL;
    }

    /* Otherwise, need to assemble all the things. */
    else {
        /* Calculate length. */
        MVMint32 length = 0, pos = 0;
        MVMDecodeStreamChars *cur_chars = ds->chars_head;
        while (cur_chars) {
            if (cur_chars == ds->chars_head)
                length += cur_chars->length - ds->chars_head_pos;
            else
                length += cur_chars->length;
            cur_chars = cur_chars->next;
        }

        /* Allocate a result buffer of the right size. */
        result->body.storage.blob_32 = MVM_malloc(length * sizeof(MVMGrapheme32));
        result->body.num_graphs      = length;

        /* Copy all the things into the target, freeing as we go. */
        cur_chars = ds->chars_head;
        while (cur_chars) {
            if (cur_chars == ds->chars_head) {
                MVMint32 to_copy = ds->chars_head->length - ds->chars_head_pos;
                memcpy(result->body.storage.blob_32 + pos, cur_chars->chars + ds->chars_head_pos,
                    cur_chars->length * sizeof(MVMGrapheme32));
                pos += to_copy;
            }
            else {
                memcpy(result->body.storage.blob_32 + pos, cur_chars->chars,
                    cur_chars->length * sizeof(MVMGrapheme32));
                pos += cur_chars->length;
            }
            cur_chars = cur_chars->next;
        }
        ds->chars_head = ds->chars_tail = NULL;
    }

    return result;
}