Ejemplo n.º 1
0
/* Assumes that we are holding the lock that serializes updates, and already
 * checked that the synthetic does not exist. Adds it to the lookup trie and
 * synthetics table, making sure to do enough copy/free-at-safe-point work to
 * not upset other threads possibly doing concurrent reads. */
static MVMGrapheme32 add_synthetic(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes, MVMint32 utf8_c8) {
    MVMNFGState     *nfg = tc->instance->nfg;
    MVMNFGSynthetic *synth;
    MVMGrapheme32    result;
    size_t           comb_size;

    /* Grow the synthetics table if needed. */
    if (nfg->num_synthetics % MVM_SYNTHETIC_GROW_ELEMS == 0) {
        size_t orig_size = nfg->num_synthetics * sizeof(MVMNFGSynthetic);
        size_t new_size  = (nfg->num_synthetics + MVM_SYNTHETIC_GROW_ELEMS) * sizeof(MVMNFGSynthetic);
        MVMNFGSynthetic *new_synthetics = MVM_fixed_size_alloc(tc, tc->instance->fsa, new_size);
        if (orig_size) {
            memcpy(new_synthetics, nfg->synthetics, orig_size);
            MVM_fixed_size_free_at_safepoint(tc, tc->instance->fsa, orig_size, nfg->synthetics);
        }
        nfg->synthetics = new_synthetics;
    }

    /* Set up the new synthetic entry. */
    synth            = &(nfg->synthetics[nfg->num_synthetics]);
    synth->base      = *codes;
    synth->num_combs = num_codes - 1;
    comb_size        = synth->num_combs * sizeof(MVMCodepoint);
    synth->combs     = MVM_fixed_size_alloc(tc, tc->instance->fsa, comb_size);
    memcpy(synth->combs, codes + 1, comb_size);
    synth->case_uc    = 0;
    synth->case_lc    = 0;
    synth->case_tc    = 0;
    synth->case_fc    = 0;
    synth->is_utf8_c8 = utf8_c8;

    /* Memory barrier to make sure the synthetic is fully in place before we
     * bump the count. */
    MVM_barrier();
    nfg->num_synthetics++;

    /* Give the synthetic an ID by negating the new number of synthetics. */
    result = -nfg->num_synthetics;

    /* Make an entry in the lookup trie for the new synthetic, so we can use
     * it in the future when seeing the same codepoint sequence. */
    add_synthetic_to_trie(tc, codes, num_codes, result);

    return result;
}
Ejemplo n.º 2
0
/* Assumes that we are holding the lock that serializes updates, and already
 * checked that the synthetic does not exist. Adds it to the lookup trie and
 * synthetics table, making sure to do enough copy/free-at-safe-point work to
 * not upset other threads possibly doing concurrent reads. */
static MVMGrapheme32 add_synthetic(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes, MVMint32 utf8_c8) {
    MVMNFGState     *nfg = tc->instance->nfg;
    MVMNFGSynthetic *synth;
    MVMGrapheme32    result;

    /* Grow the synthetics table if needed. */
    if (nfg->num_synthetics % MVM_SYNTHETIC_GROW_ELEMS == 0) {
        size_t orig_size = nfg->num_synthetics * sizeof(MVMNFGSynthetic);
        size_t new_size  = (nfg->num_synthetics + MVM_SYNTHETIC_GROW_ELEMS) * sizeof(MVMNFGSynthetic);
        MVMNFGSynthetic *new_synthetics = MVM_fixed_size_alloc(tc, tc->instance->fsa, new_size);
        if (orig_size) {
            memcpy(new_synthetics, nfg->synthetics, orig_size);
            MVM_fixed_size_free_at_safepoint(tc, tc->instance->fsa, orig_size, nfg->synthetics);
        }
        nfg->synthetics = new_synthetics;
    }

    /* Set up the new synthetic entry. */
    synth            = &(nfg->synthetics[nfg->num_synthetics]);
    synth->num_codes = num_codes;
    /* Find which codepoint is the base codepoint. It is always index 0 unless
     * there are Prepend codepoints */
    if (!utf8_c8 && MVM_unicode_codepoint_get_property_int(tc, codes[0], MVM_UNICODE_PROPERTY_GRAPHEME_CLUSTER_BREAK)
        == MVM_UNICODE_PVALUE_GCB_PREPEND) {
        MVMint64 i = 0;
        MVMCodepoint cached = codes[i++];
        MVMint64 cached_GCB = MVM_UNICODE_PVALUE_GCB_PREPEND;
        while (i < num_codes) {
            /* If it's the same codepoint as before, don't need to request
             * the property value again */
            if (cached == codes[i] || MVM_UNICODE_PVALUE_GCB_PREPEND ==
                (cached_GCB = MVM_unicode_codepoint_get_property_int(tc, (cached = codes[i]),
                    MVM_UNICODE_PROPERTY_GRAPHEME_CLUSTER_BREAK))) {
            }
            else {
                /* If we see an Extend then this is a degenerate without any
                 * base character, so set i to num_codes so base_index gets set
                 * to 0 */
                if (cached_GCB == MVM_UNICODE_PVALUE_GCB_EXTEND)
                    i = num_codes;
                break;
            }
            i++;
        }
        /* If all the codepoints were prepend then we need to set it to 0 */
        synth->base_index = num_codes == i ? 0 : i;

    }
    else {
        synth->base_index = 0;
    }


    synth->codes     = MVM_fixed_size_alloc(tc, tc->instance->fsa,
        num_codes * sizeof(MVMCodepoint));
    memcpy(synth->codes, codes, (synth->num_codes * sizeof(MVMCodepoint)));
    synth->case_uc    = 0;
    synth->case_lc    = 0;
    synth->case_tc    = 0;
    synth->case_fc    = 0;
    synth->is_utf8_c8 = utf8_c8;

    /* Memory barrier to make sure the synthetic is fully in place before we
     * bump the count. */
    MVM_barrier();
    nfg->num_synthetics++;

    /* Give the synthetic an ID by negating the new number of synthetics. */
    result = -(nfg->num_synthetics);

    /* Make an entry in the lookup trie for the new synthetic, so we can use
     * it in the future when seeing the same codepoint sequence. */
    add_synthetic_to_trie(tc, codes, num_codes, result);

    return result;
}