/* Decodes all the buffers we have, and returns a string of all decoded chars. * There may still be more to read after this, due to incomplete multi-byte * or multi-codepoint sequences that are not yet completely processed. */ MVMString * MVM_string_decodestream_get_available(MVMThreadContext *tc, MVMDecodeStream *ds) { if (ds->bytes_head) { ds->result_size_guess = ds->bytes_head->length; run_decode(tc, ds, NULL, NULL, DECODE_NOT_EOF); } return get_all_in_buffer(tc, ds); }
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds, MVMDecodeStreamSeparators *sep_spec, MVMint32 chomp) { MVMint32 sep_loc, sep_length; /* Look for separator, trying more decoding if it fails. We get the place * just beyond the separator, so can use take_chars to get what's need. * Note that decoders are only responsible for finding the final char of * the separator, so we may need to loop a few times around this. */ sep_loc = find_separator(tc, ds, sep_spec, &sep_length, 0); while (!sep_loc) { MVMuint32 decode_outcome = run_decode(tc, ds, NULL, sep_spec, DECODE_NOT_EOF); if (decode_outcome == RUN_DECODE_NOTHING_DECODED) break; if (decode_outcome == RUN_DECODE_STOPPER_REACHED) sep_loc = find_separator(tc, ds, sep_spec, &sep_length, 0); } if (sep_loc) { /* Use this line length as a guesstimate of the next, unless it's tiny * in which case we treat it as an outlier (probably an empty line or * some such). Also round up and to a nice power of 2. */ if (sep_loc > 32) ds->result_size_guess = (sep_loc << 1) & ~0xF; return take_chars(tc, ds, sep_loc, chomp ? sep_length : 0); } else { return NULL; } }
MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMint32 chars, MVMint64 eof) { MVMint32 missing; /* If we request nothing, give empty string. */ if (chars == 0) return tc->instance->str_consts.empty; /* If we don't already have enough chars, try and decode more. */ missing = missing_chars(tc, ds, chars); ds->result_size_guess = missing; if (missing) run_decode(tc, ds, &missing, NULL, DECODE_NOT_EOF); /* If we've got enough, assemble a string. Otherwise, flag EOF and retry, * falling back to returning what's available. */ if (missing_chars(tc, ds, chars) == 0) { return take_chars(tc, ds, chars, 0); } else if (eof) { reached_eof(tc, ds); return missing_chars(tc, ds, chars) == 0 ? take_chars(tc, ds, chars, 0) : MVM_string_decodestream_get_all(tc, ds); } else { return NULL; } }
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds, MVMGrapheme32 sep) { MVMint32 sep_loc; /* Look for separator, trying more decoding if it fails. We get the place * just beyond the separator, so can use take_chars to get what's need. */ sep_loc = find_separator(tc, ds, sep); if (!sep_loc) { run_decode(tc, ds, NULL, &sep); sep_loc = find_separator(tc, ds, sep); } if (sep_loc) return take_chars(tc, ds, sep_loc); else return NULL; }
/* In situations where we have hit EOF, we need to decode what's left and flush * the normalization buffer also. */ static void reached_eof(MVMThreadContext *tc, MVMDecodeStream *ds) { /* Decode all the things. */ if (ds->bytes_head) run_decode(tc, ds, NULL, NULL); /* If there's some things left in the normalization buffer, take them. */ MVM_unicode_normalizer_eof(tc, &(ds->norm)); if (MVM_unicode_normalizer_available(tc, &(ds->norm))) { MVMint32 ready = MVM_unicode_normalizer_available(tc, &(ds->norm)); MVMGrapheme32 *buffer = MVM_malloc(ready * sizeof(MVMGrapheme32)); MVMint32 count = 0; while (ready--) buffer[count++] = MVM_unicode_normalizer_get_grapheme(tc, &(ds->norm)); MVM_string_decodestream_add_chars(tc, ds, buffer, count); } }
MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMint32 chars) { MVMint32 missing; /* If we request nothing, give empty string. */ if (chars == 0) return tc->instance->str_consts.empty; /* If we don't already have enough chars, try and decode more. */ missing = missing_chars(tc, ds, chars); if (missing) run_decode(tc, ds, &missing, NULL); /* If we've got enough, assemble a string. Otherwise, give up. */ if (missing_chars(tc, ds, chars) == 0) return take_chars(tc, ds, chars, 0); else return NULL; }
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds, MVMDecodeStreamSeparators *sep_spec, MVMint32 chomp) { MVMint32 sep_loc, sep_length; /* Look for separator, trying more decoding if it fails. We get the place * just beyond the separator, so can use take_chars to get what's need. * Note that decoders are only responsible for finding the final char of * the separator, so we may need to loop a few times around this. */ sep_loc = find_separator(tc, ds, sep_spec, &sep_length); while (!sep_loc) { if (!run_decode(tc, ds, NULL, sep_spec)) break; sep_loc = find_separator(tc, ds, sep_spec, &sep_length); } if (sep_loc) return take_chars(tc, ds, sep_loc, chomp ? sep_length : 0); else return NULL; }
void run_benchmark() { gauge::config_set cs = get_current_configuration(); std::string type = cs.get_value<std::string>("type"); if (type == "encoder") { run_encode(); } else if (type == "decoder") { run_decode(); } else { assert(0); } }
/* Decodes all the buffers, producing a string containing all the decoded * characters. */ MVMString * MVM_string_decodestream_get_all(MVMThreadContext *tc, MVMDecodeStream *ds) { MVMString *result = (MVMString *)MVM_repr_alloc_init(tc, tc->instance->VMString); result->body.storage_type = MVM_STRING_GRAPHEME_32; /* Decode all the things. */ run_decode(tc, ds, NULL, NULL); /* If there's some things left in the normalization buffer, take them. */ MVM_unicode_normalizer_eof(tc, &(ds->norm)); if (MVM_unicode_normalizer_available(tc, &(ds->norm))) { MVMint32 ready = MVM_unicode_normalizer_available(tc, &(ds->norm)); MVMGrapheme32 *buffer = MVM_malloc(ready * sizeof(MVMGrapheme32)); MVMint32 count = 0; while (ready--) buffer[count++] = MVM_unicode_normalizer_get_grapheme(tc, &(ds->norm)); MVM_string_decodestream_add_chars(tc, ds, buffer, count); } /* If there's no codepoint buffer, then return the empty string. */ if (!ds->chars_head) { result->body.storage.blob_32 = NULL; result->body.num_graphs = 0; } /* If there's exactly one resulting codepoint buffer and we swallowed none * of it, just use it. */ else if (ds->chars_head == ds->chars_tail && ds->chars_head_pos == 0) { /* Set up result string. */ result->body.storage.blob_32 = ds->chars_head->chars; result->body.num_graphs = ds->chars_head->length; /* Don't free the buffer's memory itself, just the holder, as we * stole that for the buffer into the string above. */ MVM_free(ds->chars_head); ds->chars_head = ds->chars_tail = NULL; } /* Otherwise, need to assemble all the things. */ else { /* Calculate length. */ MVMint32 length = 0, pos = 0; MVMDecodeStreamChars *cur_chars = ds->chars_head; while (cur_chars) { if (cur_chars == ds->chars_head) length += cur_chars->length - ds->chars_head_pos; else length += cur_chars->length; cur_chars = cur_chars->next; } /* Allocate a result buffer of the right size. */ result->body.storage.blob_32 = MVM_malloc(length * sizeof(MVMGrapheme32)); result->body.num_graphs = length; /* Copy all the things into the target, freeing as we go. */ cur_chars = ds->chars_head; while (cur_chars) { if (cur_chars == ds->chars_head) { MVMint32 to_copy = ds->chars_head->length - ds->chars_head_pos; memcpy(result->body.storage.blob_32 + pos, cur_chars->chars + ds->chars_head_pos, cur_chars->length * sizeof(MVMGrapheme32)); pos += to_copy; } else { memcpy(result->body.storage.blob_32 + pos, cur_chars->chars, cur_chars->length * sizeof(MVMGrapheme32)); pos += cur_chars->length; } cur_chars = cur_chars->next; } ds->chars_head = ds->chars_tail = NULL; } return result; }