MVMint32 MVM_6model_find_method_spesh(MVMThreadContext *tc, MVMObject *obj, MVMString *name, MVMint32 ss_idx, MVMRegister *res) { /* Missed mono-morph; try cache-only lookup. */ MVMObject *meth = MVM_6model_find_method_cache_only(tc, obj, name); if (!MVM_is_null(tc, meth)) { /* Got it; cache. Must be careful due to threads * reading, races, etc. */ MVMStaticFrame *sf = tc->cur_frame->static_info; uv_mutex_lock(&tc->instance->mutex_spesh_install); if (!tc->cur_frame->effective_spesh_slots[ss_idx + 1]) { MVM_ASSIGN_REF(tc, &(sf->common.header), tc->cur_frame->effective_spesh_slots[ss_idx + 1], (MVMCollectable *)meth); MVM_barrier(); MVM_ASSIGN_REF(tc, &(sf->common.header), tc->cur_frame->effective_spesh_slots[ss_idx], (MVMCollectable *)STABLE(obj)); } uv_mutex_unlock(&tc->instance->mutex_spesh_install); res->o = meth; return 0; } else { /* Fully late-bound. */ MVM_6model_find_method(tc, obj, name, res); return 1; } }
/* Assumes that we are holding the lock that serializes updates, and already * checked that the synthetic does not exist. Adds it to the lookup trie and * synthetics table, making sure to do enough copy/free-at-safe-point work to * not upset other threads possibly doing concurrent reads. */ static MVMGrapheme32 add_synthetic(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes, MVMint32 utf8_c8) { MVMNFGState *nfg = tc->instance->nfg; MVMNFGSynthetic *synth; MVMGrapheme32 result; size_t comb_size; /* Grow the synthetics table if needed. */ if (nfg->num_synthetics % MVM_SYNTHETIC_GROW_ELEMS == 0) { size_t orig_size = nfg->num_synthetics * sizeof(MVMNFGSynthetic); size_t new_size = (nfg->num_synthetics + MVM_SYNTHETIC_GROW_ELEMS) * sizeof(MVMNFGSynthetic); MVMNFGSynthetic *new_synthetics = MVM_fixed_size_alloc(tc, tc->instance->fsa, new_size); if (orig_size) { memcpy(new_synthetics, nfg->synthetics, orig_size); MVM_fixed_size_free_at_safepoint(tc, tc->instance->fsa, orig_size, nfg->synthetics); } nfg->synthetics = new_synthetics; } /* Set up the new synthetic entry. */ synth = &(nfg->synthetics[nfg->num_synthetics]); synth->base = *codes; synth->num_combs = num_codes - 1; comb_size = synth->num_combs * sizeof(MVMCodepoint); synth->combs = MVM_fixed_size_alloc(tc, tc->instance->fsa, comb_size); memcpy(synth->combs, codes + 1, comb_size); synth->case_uc = 0; synth->case_lc = 0; synth->case_tc = 0; synth->case_fc = 0; synth->is_utf8_c8 = utf8_c8; /* Memory barrier to make sure the synthetic is fully in place before we * bump the count. */ MVM_barrier(); nfg->num_synthetics++; /* Give the synthetic an ID by negating the new number of synthetics. */ result = -nfg->num_synthetics; /* Make an entry in the lookup trie for the new synthetic, so we can use * it in the future when seeing the same codepoint sequence. */ add_synthetic_to_trie(tc, codes, num_codes, result); return result; }
/* Produces and installs a specialized version of the code, according to the * specified plan. */ void MVM_spesh_candidate_add(MVMThreadContext *tc, MVMSpeshPlanned *p) { MVMSpeshGraph *sg; MVMSpeshCode *sc; MVMSpeshCandidate *candidate; MVMSpeshCandidate **new_candidate_list; MVMStaticFrameSpesh *spesh; MVMuint64 start_time, spesh_time, jit_time, end_time; /* If we've reached our specialization limit, don't continue. */ MVMint32 spesh_produced = ++tc->instance->spesh_produced; if (tc->instance->spesh_limit) if (spesh_produced > tc->instance->spesh_limit) return; /* Produce the specialization graph and, if we're logging, dump it out * pre-transformation. */ #if MVM_GC_DEBUG tc->in_spesh = 1; #endif sg = MVM_spesh_graph_create(tc, p->sf, 0, 1); if (MVM_spesh_debug_enabled(tc)) { char *c_name = MVM_string_utf8_encode_C_string(tc, p->sf->body.name); char *c_cuid = MVM_string_utf8_encode_C_string(tc, p->sf->body.cuuid); MVMSpeshFacts **facts = sg->facts; char *before; sg->facts = NULL; before = MVM_spesh_dump(tc, sg); sg->facts = facts; MVM_spesh_debug_printf(tc, "Specialization of '%s' (cuid: %s)\n\n", c_name, c_cuid); MVM_spesh_debug_printf(tc, "Before:\n%s", before); MVM_free(c_name); MVM_free(c_cuid); MVM_free(before); fflush(tc->instance->spesh_log_fh); start_time = uv_hrtime(); } /* Attach the graph so we will be able to mark it during optimization, * allowing us to stick GC sync points at various places and so not let * the optimization work block GC for too long. */ tc->spesh_active_graph = sg; spesh_gc_point(tc); /* Perform the optimization and, if we're logging, dump out the result. */ if (p->cs_stats->cs) MVM_spesh_args(tc, sg, p->cs_stats->cs, p->type_tuple); spesh_gc_point(tc); MVM_spesh_facts_discover(tc, sg, p, 0); spesh_gc_point(tc); MVM_spesh_optimize(tc, sg, p); spesh_gc_point(tc); /* Clear active graph; beyond this point, no more GC syncs. */ tc->spesh_active_graph = NULL; if (MVM_spesh_debug_enabled(tc)) spesh_time = uv_hrtime(); /* Generate code and install it into the candidate. */ sc = MVM_spesh_codegen(tc, sg); candidate = MVM_calloc(1, sizeof(MVMSpeshCandidate)); candidate->bytecode = sc->bytecode; candidate->bytecode_size = sc->bytecode_size; candidate->handlers = sc->handlers; candidate->deopt_usage_info = sc->deopt_usage_info; candidate->num_handlers = sg->num_handlers; candidate->num_deopts = sg->num_deopt_addrs; candidate->deopts = sg->deopt_addrs; candidate->deopt_named_used_bit_field = sg->deopt_named_used_bit_field; candidate->deopt_pea = sg->deopt_pea; candidate->num_locals = sg->num_locals; candidate->num_lexicals = sg->num_lexicals; candidate->num_inlines = sg->num_inlines; candidate->inlines = sg->inlines; candidate->local_types = sg->local_types; candidate->lexical_types = sg->lexical_types; MVM_free(sc); /* Try to JIT compile the optimised graph. The JIT graph hangs from * the spesh graph and can safely be deleted with it. */ if (tc->instance->jit_enabled) { MVMJitGraph *jg; if (MVM_spesh_debug_enabled(tc)) jit_time = uv_hrtime(); jg = MVM_jit_try_make_graph(tc, sg); if (jg != NULL) { candidate->jitcode = MVM_jit_compile_graph(tc, jg); MVM_jit_graph_destroy(tc, jg); } } if (MVM_spesh_debug_enabled(tc)) { char *after = MVM_spesh_dump(tc, sg); end_time = uv_hrtime(); MVM_spesh_debug_printf(tc, "After:\n%s", after); MVM_spesh_debug_printf(tc, "Specialization took %" PRIu64 "us (total %" PRIu64"us)\n", (spesh_time - start_time) / 1000, (end_time - start_time) / 1000); if (tc->instance->jit_enabled) { MVM_spesh_debug_printf(tc, "JIT was %ssuccessful and compilation took %" PRIu64 "us\n", candidate->jitcode ? "" : "not ", (end_time - jit_time) / 1000); if (candidate->jitcode) { MVM_spesh_debug_printf(tc, " Bytecode size: %" PRIu64 " byte\n", candidate->jitcode->size); } } MVM_spesh_debug_printf(tc, "\n========\n\n"); MVM_free(after); fflush(tc->instance->spesh_log_fh); } /* calculate work environment taking JIT spill area into account */ calculate_work_env_sizes(tc, sg->sf, candidate); /* Update spesh slots. */ candidate->num_spesh_slots = sg->num_spesh_slots; candidate->spesh_slots = sg->spesh_slots; /* Claim ownership of allocated memory assigned to the candidate */ sg->cand = candidate; MVM_spesh_graph_destroy(tc, sg); /* Create a new candidate list and copy any existing ones. Free memory * using the FSA safepoint mechanism. */ spesh = p->sf->body.spesh; new_candidate_list = MVM_fixed_size_alloc(tc, tc->instance->fsa, (spesh->body.num_spesh_candidates + 1) * sizeof(MVMSpeshCandidate *)); if (spesh->body.num_spesh_candidates) { size_t orig_size = spesh->body.num_spesh_candidates * sizeof(MVMSpeshCandidate *); memcpy(new_candidate_list, spesh->body.spesh_candidates, orig_size); MVM_fixed_size_free_at_safepoint(tc, tc->instance->fsa, orig_size, spesh->body.spesh_candidates); } new_candidate_list[spesh->body.num_spesh_candidates] = candidate; spesh->body.spesh_candidates = new_candidate_list; /* May now be referencing nursery objects, so barrier just in case. */ if (spesh->common.header.flags & MVM_CF_SECOND_GEN) MVM_gc_write_barrier_hit(tc, (MVMCollectable *)spesh); /* Update the guards, and bump the candidate count. This means there is a * period when we can read, in another thread, a candidate ahead of the * count being updated. Since we set it up above, that's fine enough. The * updating of the count *after* this, plus the barrier, is to make sure * the guards are in place before the count is bumped, since OSR will * watch the number of candidates to see if there's one for it to try and * jump in to, and if the guards aren't in place first will see there is * not, and not bother checking again. */ MVM_spesh_arg_guard_add(tc, &(spesh->body.spesh_arg_guard), p->cs_stats->cs, p->type_tuple, spesh->body.num_spesh_candidates); MVM_barrier(); spesh->body.num_spesh_candidates++; /* If we're logging, dump the upadated arg guards also. */ if (MVM_spesh_debug_enabled(tc)) { char *guard_dump = MVM_spesh_dump_arg_guard(tc, p->sf); MVM_spesh_debug_printf(tc, "%s========\n\n", guard_dump); fflush(tc->instance->spesh_log_fh); MVM_free(guard_dump); } #if MVM_GC_DEBUG tc->in_spesh = 0; #endif }
/* Assumes that we are holding the lock that serializes updates, and already * checked that the synthetic does not exist. Adds it to the lookup trie and * synthetics table, making sure to do enough copy/free-at-safe-point work to * not upset other threads possibly doing concurrent reads. */ static MVMGrapheme32 add_synthetic(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes, MVMint32 utf8_c8) { MVMNFGState *nfg = tc->instance->nfg; MVMNFGSynthetic *synth; MVMGrapheme32 result; /* Grow the synthetics table if needed. */ if (nfg->num_synthetics % MVM_SYNTHETIC_GROW_ELEMS == 0) { size_t orig_size = nfg->num_synthetics * sizeof(MVMNFGSynthetic); size_t new_size = (nfg->num_synthetics + MVM_SYNTHETIC_GROW_ELEMS) * sizeof(MVMNFGSynthetic); MVMNFGSynthetic *new_synthetics = MVM_fixed_size_alloc(tc, tc->instance->fsa, new_size); if (orig_size) { memcpy(new_synthetics, nfg->synthetics, orig_size); MVM_fixed_size_free_at_safepoint(tc, tc->instance->fsa, orig_size, nfg->synthetics); } nfg->synthetics = new_synthetics; } /* Set up the new synthetic entry. */ synth = &(nfg->synthetics[nfg->num_synthetics]); synth->num_codes = num_codes; /* Find which codepoint is the base codepoint. It is always index 0 unless * there are Prepend codepoints */ if (!utf8_c8 && MVM_unicode_codepoint_get_property_int(tc, codes[0], MVM_UNICODE_PROPERTY_GRAPHEME_CLUSTER_BREAK) == MVM_UNICODE_PVALUE_GCB_PREPEND) { MVMint64 i = 0; MVMCodepoint cached = codes[i++]; MVMint64 cached_GCB = MVM_UNICODE_PVALUE_GCB_PREPEND; while (i < num_codes) { /* If it's the same codepoint as before, don't need to request * the property value again */ if (cached == codes[i] || MVM_UNICODE_PVALUE_GCB_PREPEND == (cached_GCB = MVM_unicode_codepoint_get_property_int(tc, (cached = codes[i]), MVM_UNICODE_PROPERTY_GRAPHEME_CLUSTER_BREAK))) { } else { /* If we see an Extend then this is a degenerate without any * base character, so set i to num_codes so base_index gets set * to 0 */ if (cached_GCB == MVM_UNICODE_PVALUE_GCB_EXTEND) i = num_codes; break; } i++; } /* If all the codepoints were prepend then we need to set it to 0 */ synth->base_index = num_codes == i ? 0 : i; } else { synth->base_index = 0; } synth->codes = MVM_fixed_size_alloc(tc, tc->instance->fsa, num_codes * sizeof(MVMCodepoint)); memcpy(synth->codes, codes, (synth->num_codes * sizeof(MVMCodepoint))); synth->case_uc = 0; synth->case_lc = 0; synth->case_tc = 0; synth->case_fc = 0; synth->is_utf8_c8 = utf8_c8; /* Memory barrier to make sure the synthetic is fully in place before we * bump the count. */ MVM_barrier(); nfg->num_synthetics++; /* Give the synthetic an ID by negating the new number of synthetics. */ result = -(nfg->num_synthetics); /* Make an entry in the lookup trie for the new synthetic, so we can use * it in the future when seeing the same codepoint sequence. */ add_synthetic_to_trie(tc, codes, num_codes, result); return result; }
static void add_synthetic_to_trie(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes, MVMGrapheme32 synthetic) { MVMNFGState *nfg = tc->instance->nfg; MVMNFGTrieNode *new_trie = twiddle_trie_node(tc, nfg->grapheme_lookup, codes, num_codes, synthetic); MVM_barrier(); nfg->grapheme_lookup = new_trie; }
/* Tries to generate a specialization of the bytecode, for the given callsite * and argument tuple. */ MVMSpeshCandidate * MVM_spesh_candidate_generate(MVMThreadContext *tc, MVMStaticFrame *static_frame, MVMCallsite *callsite, MVMRegister *args) { MVMSpeshCandidate *result; MVMSpeshGuard *guards; MVMSpeshCode *sc; MVMint32 num_spesh_slots, num_guards, *deopts, num_deopts; MVMCollectable **spesh_slots; char *before, *after; /* Generate the specialization. */ MVMSpeshGraph *sg = MVM_spesh_graph_create(tc, static_frame); if (tc->instance->spesh_log_fh) before = MVM_spesh_dump(tc, sg); MVM_spesh_args(tc, sg, callsite, args); MVM_spesh_facts_discover(tc, sg); MVM_spesh_optimize(tc, sg); if (tc->instance->spesh_log_fh) after = MVM_spesh_dump(tc, sg); sc = MVM_spesh_codegen(tc, sg); num_spesh_slots = sg->num_spesh_slots; spesh_slots = sg->spesh_slots; num_guards = sg->num_guards; guards = sg->guards; num_deopts = sg->num_deopt_addrs; deopts = sg->deopt_addrs; MVM_spesh_graph_destroy(tc, sg); /* Now try to add it. Note there's a slim chance another thread beat us * to doing so. Also other threads can read the specializations without * lock, so make absolutely sure we increment the count of them after we * add the new one. */ result = NULL; uv_mutex_lock(&tc->instance->mutex_spesh_install); if (static_frame->body.num_spesh_candidates < MVM_SPESH_LIMIT) { MVMint32 num_spesh = static_frame->body.num_spesh_candidates; MVMint32 i; for (i = 0; i < num_spesh; i++) { MVMSpeshCandidate *compare = &static_frame->body.spesh_candidates[i]; if (compare->cs == callsite && compare->num_guards == num_guards && memcmp(compare->guards, guards, num_guards * sizeof(MVMSpeshGuard)) == 0) { /* Beaten! */ result = &static_frame->body.spesh_candidates[i]; break; } } if (!result) { if (!static_frame->body.spesh_candidates) static_frame->body.spesh_candidates = malloc( MVM_SPESH_LIMIT * sizeof(MVMSpeshCandidate)); result = &static_frame->body.spesh_candidates[num_spesh]; result->cs = callsite; result->num_guards = num_guards; result->guards = guards; result->bytecode = sc->bytecode; result->bytecode_size = sc->bytecode_size; result->handlers = sc->handlers; result->num_spesh_slots = num_spesh_slots; result->spesh_slots = spesh_slots; result->num_deopts = num_deopts; result->deopts = deopts; MVM_barrier(); static_frame->body.num_spesh_candidates++; if (static_frame->common.header.flags & MVM_CF_SECOND_GEN) if (!(static_frame->common.header.flags & MVM_CF_IN_GEN2_ROOT_LIST)) MVM_gc_root_gen2_add(tc, (MVMCollectable *)static_frame); if (tc->instance->spesh_log_fh) { char *c_name = MVM_string_utf8_encode_C_string(tc, static_frame->body.name); char *c_cuid = MVM_string_utf8_encode_C_string(tc, static_frame->body.cuuid); fprintf(tc->instance->spesh_log_fh, "Specialized '%s' (cuid: %s)\n\n", c_name, c_cuid); fprintf(tc->instance->spesh_log_fh, "Before:\n%s\nAfter:\n%s\n\n========\n\n", before, after); free(before); free(after); free(c_name); free(c_cuid); } } } if (!result) { free(sc->bytecode); free(sc->handlers); } uv_mutex_unlock(&tc->instance->mutex_spesh_install); free(sc); return result; }