/* Allocate a piece of memory from the spesh graph's buffer. Deallocated when * the spesh graph is. */ void * MVM_spesh_alloc(MVMThreadContext *tc, MVMSpeshGraph *g, size_t bytes) { char *result = NULL; #if !defined(MVM_CAN_UNALIGNED_INT64) || !defined(MVM_CAN_UNALIGNED_NUM64) /* Round up size to next multiple of 8, to ensure alignment. */ bytes = (bytes + 7) & ~7; #endif if (g->mem_block) { MVMSpeshMemBlock *block = g->mem_block; if (block->alloc + bytes < block->limit) { result = block->alloc; block->alloc += bytes; } } if (!result) { /* No block, or block was full. Add another. */ MVMSpeshMemBlock *block = MVM_malloc(sizeof(MVMSpeshMemBlock)); block->buffer = MVM_calloc(MVM_SPESH_MEMBLOCK_SIZE, 1); block->alloc = block->buffer; block->limit = block->buffer + MVM_SPESH_MEMBLOCK_SIZE; block->prev = g->mem_block; g->mem_block = block; /* Now allocate out of it. */ if (bytes > MVM_SPESH_MEMBLOCK_SIZE) { MVM_spesh_graph_destroy(tc, g); MVM_exception_throw_adhoc(tc, "MVM_spesh_alloc: requested oversized block"); } result = block->alloc; block->alloc += bytes; } return result; }
/* Adds instrumented versions of the unspecialized bytecode. */ static void add_instrumentation(MVMThreadContext *tc, MVMStaticFrame *sf) { MVMSpeshCode *sc; MVMSpeshGraph *sg = MVM_spesh_graph_create(tc, sf, 1); instrument_graph(tc, sg); sc = MVM_spesh_codegen(tc, sg); sf->body.instrumented_bytecode = sc->bytecode; sf->body.instrumented_handlers = sc->handlers; sf->body.instrumented_bytecode_size = sc->bytecode_size; sf->body.uninstrumented_bytecode = sf->body.bytecode; sf->body.uninstrumented_handlers = sf->body.handlers; sf->body.uninstrumented_bytecode_size = sf->body.bytecode_size; MVM_spesh_graph_destroy(tc, sg); MVM_free(sc); }
/* Adds instrumented version of the unspecialized bytecode. */ static void add_instrumentation(MVMThreadContext *tc, MVMStaticFrame *sf, MVMuint8 want_coverage) { MVMSpeshCode *sc; MVMStaticFrameInstrumentation *ins; MVMSpeshGraph *sg = MVM_spesh_graph_create(tc, sf, 1, 0); if (want_coverage) instrument_graph(tc, sg); else instrument_graph_with_breakpoints(tc, sg); sc = MVM_spesh_codegen(tc, sg); ins = sf->body.instrumentation; if (!ins) ins = MVM_calloc(1, sizeof(MVMStaticFrameInstrumentation)); ins->instrumented_bytecode = sc->bytecode; ins->instrumented_handlers = sc->handlers; ins->instrumented_bytecode_size = sc->bytecode_size; ins->uninstrumented_bytecode = sf->body.bytecode; ins->uninstrumented_handlers = sf->body.handlers; ins->uninstrumented_bytecode_size = sf->body.bytecode_size; sf->body.instrumentation = ins; MVM_spesh_graph_destroy(tc, sg); MVM_free(sc); }
/* Produces and installs a specialized version of the code, according to the * specified plan. */ void MVM_spesh_candidate_add(MVMThreadContext *tc, MVMSpeshPlanned *p) { MVMSpeshGraph *sg; MVMSpeshCode *sc; MVMSpeshCandidate *candidate; MVMSpeshCandidate **new_candidate_list; MVMStaticFrameSpesh *spesh; MVMuint64 start_time, spesh_time, jit_time, end_time; /* If we've reached our specialization limit, don't continue. */ MVMint32 spesh_produced = ++tc->instance->spesh_produced; if (tc->instance->spesh_limit) if (spesh_produced > tc->instance->spesh_limit) return; /* Produce the specialization graph and, if we're logging, dump it out * pre-transformation. */ #if MVM_GC_DEBUG tc->in_spesh = 1; #endif sg = MVM_spesh_graph_create(tc, p->sf, 0, 1); if (MVM_spesh_debug_enabled(tc)) { char *c_name = MVM_string_utf8_encode_C_string(tc, p->sf->body.name); char *c_cuid = MVM_string_utf8_encode_C_string(tc, p->sf->body.cuuid); MVMSpeshFacts **facts = sg->facts; char *before; sg->facts = NULL; before = MVM_spesh_dump(tc, sg); sg->facts = facts; MVM_spesh_debug_printf(tc, "Specialization of '%s' (cuid: %s)\n\n", c_name, c_cuid); MVM_spesh_debug_printf(tc, "Before:\n%s", before); MVM_free(c_name); MVM_free(c_cuid); MVM_free(before); fflush(tc->instance->spesh_log_fh); start_time = uv_hrtime(); } /* Attach the graph so we will be able to mark it during optimization, * allowing us to stick GC sync points at various places and so not let * the optimization work block GC for too long. */ tc->spesh_active_graph = sg; spesh_gc_point(tc); /* Perform the optimization and, if we're logging, dump out the result. */ if (p->cs_stats->cs) MVM_spesh_args(tc, sg, p->cs_stats->cs, p->type_tuple); spesh_gc_point(tc); MVM_spesh_facts_discover(tc, sg, p, 0); spesh_gc_point(tc); MVM_spesh_optimize(tc, sg, p); spesh_gc_point(tc); /* Clear active graph; beyond this point, no more GC syncs. */ tc->spesh_active_graph = NULL; if (MVM_spesh_debug_enabled(tc)) spesh_time = uv_hrtime(); /* Generate code and install it into the candidate. */ sc = MVM_spesh_codegen(tc, sg); candidate = MVM_calloc(1, sizeof(MVMSpeshCandidate)); candidate->bytecode = sc->bytecode; candidate->bytecode_size = sc->bytecode_size; candidate->handlers = sc->handlers; candidate->deopt_usage_info = sc->deopt_usage_info; candidate->num_handlers = sg->num_handlers; candidate->num_deopts = sg->num_deopt_addrs; candidate->deopts = sg->deopt_addrs; candidate->deopt_named_used_bit_field = sg->deopt_named_used_bit_field; candidate->deopt_pea = sg->deopt_pea; candidate->num_locals = sg->num_locals; candidate->num_lexicals = sg->num_lexicals; candidate->num_inlines = sg->num_inlines; candidate->inlines = sg->inlines; candidate->local_types = sg->local_types; candidate->lexical_types = sg->lexical_types; MVM_free(sc); /* Try to JIT compile the optimised graph. The JIT graph hangs from * the spesh graph and can safely be deleted with it. */ if (tc->instance->jit_enabled) { MVMJitGraph *jg; if (MVM_spesh_debug_enabled(tc)) jit_time = uv_hrtime(); jg = MVM_jit_try_make_graph(tc, sg); if (jg != NULL) { candidate->jitcode = MVM_jit_compile_graph(tc, jg); MVM_jit_graph_destroy(tc, jg); } } if (MVM_spesh_debug_enabled(tc)) { char *after = MVM_spesh_dump(tc, sg); end_time = uv_hrtime(); MVM_spesh_debug_printf(tc, "After:\n%s", after); MVM_spesh_debug_printf(tc, "Specialization took %" PRIu64 "us (total %" PRIu64"us)\n", (spesh_time - start_time) / 1000, (end_time - start_time) / 1000); if (tc->instance->jit_enabled) { MVM_spesh_debug_printf(tc, "JIT was %ssuccessful and compilation took %" PRIu64 "us\n", candidate->jitcode ? "" : "not ", (end_time - jit_time) / 1000); if (candidate->jitcode) { MVM_spesh_debug_printf(tc, " Bytecode size: %" PRIu64 " byte\n", candidate->jitcode->size); } } MVM_spesh_debug_printf(tc, "\n========\n\n"); MVM_free(after); fflush(tc->instance->spesh_log_fh); } /* calculate work environment taking JIT spill area into account */ calculate_work_env_sizes(tc, sg->sf, candidate); /* Update spesh slots. */ candidate->num_spesh_slots = sg->num_spesh_slots; candidate->spesh_slots = sg->spesh_slots; /* Claim ownership of allocated memory assigned to the candidate */ sg->cand = candidate; MVM_spesh_graph_destroy(tc, sg); /* Create a new candidate list and copy any existing ones. Free memory * using the FSA safepoint mechanism. */ spesh = p->sf->body.spesh; new_candidate_list = MVM_fixed_size_alloc(tc, tc->instance->fsa, (spesh->body.num_spesh_candidates + 1) * sizeof(MVMSpeshCandidate *)); if (spesh->body.num_spesh_candidates) { size_t orig_size = spesh->body.num_spesh_candidates * sizeof(MVMSpeshCandidate *); memcpy(new_candidate_list, spesh->body.spesh_candidates, orig_size); MVM_fixed_size_free_at_safepoint(tc, tc->instance->fsa, orig_size, spesh->body.spesh_candidates); } new_candidate_list[spesh->body.num_spesh_candidates] = candidate; spesh->body.spesh_candidates = new_candidate_list; /* May now be referencing nursery objects, so barrier just in case. */ if (spesh->common.header.flags & MVM_CF_SECOND_GEN) MVM_gc_write_barrier_hit(tc, (MVMCollectable *)spesh); /* Update the guards, and bump the candidate count. This means there is a * period when we can read, in another thread, a candidate ahead of the * count being updated. Since we set it up above, that's fine enough. The * updating of the count *after* this, plus the barrier, is to make sure * the guards are in place before the count is bumped, since OSR will * watch the number of candidates to see if there's one for it to try and * jump in to, and if the guards aren't in place first will see there is * not, and not bother checking again. */ MVM_spesh_arg_guard_add(tc, &(spesh->body.spesh_arg_guard), p->cs_stats->cs, p->type_tuple, spesh->body.num_spesh_candidates); MVM_barrier(); spesh->body.num_spesh_candidates++; /* If we're logging, dump the upadated arg guards also. */ if (MVM_spesh_debug_enabled(tc)) { char *guard_dump = MVM_spesh_dump_arg_guard(tc, p->sf); MVM_spesh_debug_printf(tc, "%s========\n\n", guard_dump); fflush(tc->instance->spesh_log_fh); MVM_free(guard_dump); } #if MVM_GC_DEBUG tc->in_spesh = 0; #endif }
/* Sees if it will be possible to inline the target code ref, given we could * already identify a spesh candidate. Returns NULL if no inlining is possible * or a graph ready to be merged if it will be possible. */ MVMSpeshGraph * MVM_spesh_inline_try_get_graph(MVMThreadContext *tc, MVMSpeshGraph *inliner, MVMCode *target, MVMSpeshCandidate *cand) { MVMSpeshGraph *ig; MVMSpeshBB *bb; /* Check inlining is enabled. */ if (!tc->instance->spesh_inline_enabled) return NULL; /* Check bytecode size is within the inline limit. */ if (cand->bytecode_size > MVM_SPESH_MAX_INLINE_SIZE) return NULL; /* Ensure that this isn't a recursive inlining. */ if (target->body.sf == inliner->sf) return NULL; /* Ensure the candidate isn't still logging. */ if (cand->sg) return NULL; /* Build graph from the already-specialized bytecode. */ ig = MVM_spesh_graph_create_from_cand(tc, target->body.sf, cand, 0); /* Traverse graph, looking for anything that might prevent inlining and * also building usage counts up. */ bb = ig->entry; while (bb) { MVMSpeshIns *ins = bb->first_ins; while (ins) { /* Track usages. */ MVMint32 opcode = ins->info->opcode; MVMint32 is_phi = opcode == MVM_SSA_PHI; MVMuint8 i; for (i = 0; i < ins->info->num_operands; i++) if ((is_phi && i > 0) || (!is_phi && (ins->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_read_reg)) ig->facts[ins->operands[i].reg.orig][ins->operands[i].reg.i].usages++; if (opcode == MVM_OP_inc_i || opcode == MVM_OP_inc_u || opcode == MVM_OP_dec_i || opcode == MVM_OP_dec_u) ig->facts[ins->operands[0].reg.orig][ins->operands[0].reg.i - 1].usages++; /* Instruction may be marked directly as not being inlinable, in * which case we're done. */ if (!is_phi && ins->info->no_inline) goto not_inlinable; /* If we have lexical access, make sure it's within the frame. */ if (ins->info->opcode == MVM_OP_getlex) { if (ins->operands[1].lex.outers > 0) goto not_inlinable; } else if (ins->info->opcode == MVM_OP_bindlex) { if (ins->operands[0].lex.outers > 0) goto not_inlinable; } /* Check we don't have too many args for inlining to work out. */ if (ins->info->opcode == MVM_OP_sp_getarg_o || ins->info->opcode == MVM_OP_sp_getarg_i || ins->info->opcode == MVM_OP_sp_getarg_n || ins->info->opcode == MVM_OP_sp_getarg_s) { if (ins->operands[1].lit_i16 >= MAX_ARGS_FOR_OPT) goto not_inlinable; } /* Ext-ops need special care in inter-comp-unit inlines. */ if (ins->info->opcode == (MVMuint16)-1) { MVMCompUnit *target_cu = inliner->sf->body.cu; MVMCompUnit *source_cu = target->body.sf->body.cu; if (source_cu != target_cu) demand_extop(tc, target_cu, source_cu, ins->info); } ins = ins->next; } bb = bb->linear_next; } /* If we found nothing we can't inline, inlining is fine. */ return ig; /* If we can't find a way to inline, we end up here. */ not_inlinable: MVM_spesh_graph_destroy(tc, ig); return NULL; }
static void build_cfg(MVMThreadContext *tc, MVMSpeshGraph *g, MVMStaticFrame *sf, MVMint32 *existing_deopts, MVMint32 num_existing_deopts) { MVMSpeshBB *cur_bb, *prev_bb; MVMSpeshIns *last_ins; MVMint64 i; MVMint32 bb_idx; /* Temporary array of all MVMSpeshIns we create (one per instruction). * Overestimate at size. Has the flat view, matching the bytecode. */ MVMSpeshIns **ins_flat = MVM_calloc(g->bytecode_size / 2, sizeof(MVMSpeshIns *)); /* Temporary array where each byte in the input bytecode gets a 32-bit * integer. This is used for two things: * A) When we make the MVMSpeshIns for an instruction starting at the * byte, we put the instruction index (into ins_flat) in the slot, * shifting it by 2 bits to the left. We will use this to do fixups. * B) The first bit is "I have an incoming branch" - that is, start of * a basic block. The second bit is "I can branch" - that is, end of * a basic block. It's possible to have both bits set. * Anything that's just a zero has no instruction starting there. */ MVMuint32 *byte_to_ins_flags = MVM_calloc(g->bytecode_size, sizeof(MVMuint32)); /* Instruction to basic block mapping. Initialized later. */ MVMSpeshBB **ins_to_bb = NULL; /* Make first pass through the bytecode. In this pass, we make MVMSpeshIns * nodes for each instruction and set the start/end of block bits. Also * set handler targets as basic block starters. */ MVMCompUnit *cu = sf->body.cu; MVMuint8 *pc = g->bytecode; MVMuint8 *end = g->bytecode + g->bytecode_size; MVMuint32 ins_idx = 0; MVMuint8 next_bbs = 1; /* Next iteration (here, first) starts a BB. */ for (i = 0; i < g->num_handlers; i++) byte_to_ins_flags[g->handlers[i].goto_offset] |= MVM_CFG_BB_START; while (pc < end) { /* Look up op info. */ MVMuint16 opcode = *(MVMuint16 *)pc; MVMuint8 *args = pc + 2; MVMuint8 arg_size = 0; const MVMOpInfo *info = get_op_info(tc, cu, opcode); /* Create an instruction node, add it, and record its position. */ MVMSpeshIns *ins_node = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); ins_flat[ins_idx] = ins_node; byte_to_ins_flags[pc - g->bytecode] |= ins_idx << 2; /* Did previous instruction end a basic block? */ if (next_bbs) { byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_START; next_bbs = 0; } /* Also check we're not already a BB start due to being a branch * target, in which case we should ensure our prior is marked as * a BB end. */ else { if (byte_to_ins_flags[pc - g->bytecode] & MVM_CFG_BB_START) { MVMuint32 hunt = pc - g->bytecode; while (!byte_to_ins_flags[--hunt]); byte_to_ins_flags[hunt] |= MVM_CFG_BB_END; } } /* Store opcode */ ins_node->info = info; /* Go over operands. */ ins_node->operands = MVM_spesh_alloc(tc, g, info->num_operands * sizeof(MVMSpeshOperand)); for (i = 0; i < info->num_operands; i++) { MVMuint8 flags = info->operands[i]; MVMuint8 rw = flags & MVM_operand_rw_mask; switch (rw) { case MVM_operand_read_reg: case MVM_operand_write_reg: ins_node->operands[i].reg.orig = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_read_lex: case MVM_operand_write_lex: ins_node->operands[i].lex.idx = GET_UI16(args, arg_size); ins_node->operands[i].lex.outers = GET_UI16(args, arg_size + 2); arg_size += 4; break; case MVM_operand_literal: { MVMuint32 type = flags & MVM_operand_type_mask; switch (type) { case MVM_operand_int8: ins_node->operands[i].lit_i8 = GET_I8(args, arg_size); arg_size += 1; break; case MVM_operand_int16: ins_node->operands[i].lit_i16 = GET_I16(args, arg_size); arg_size += 2; break; case MVM_operand_int32: ins_node->operands[i].lit_i32 = GET_I32(args, arg_size); arg_size += 4; break; case MVM_operand_int64: ins_node->operands[i].lit_i64 = MVM_BC_get_I64(args, arg_size); arg_size += 8; break; case MVM_operand_num32: ins_node->operands[i].lit_n32 = GET_N32(args, arg_size); arg_size += 4; break; case MVM_operand_num64: ins_node->operands[i].lit_n64 = MVM_BC_get_N64(args, arg_size); arg_size += 8; break; case MVM_operand_callsite: ins_node->operands[i].callsite_idx = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_coderef: ins_node->operands[i].coderef_idx = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_str: ins_node->operands[i].lit_str_idx = GET_UI32(args, arg_size); arg_size += 4; break; case MVM_operand_ins: { /* Stash instruction offset. */ MVMuint32 target = GET_UI32(args, arg_size); ins_node->operands[i].ins_offset = target; /* This is a branching instruction, so it's a BB end. */ byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; /* Its target is a BB start, and any previous instruction * we already passed needs marking as a BB end. */ byte_to_ins_flags[target] |= MVM_CFG_BB_START; if (target > 0 && target < pc - g->bytecode) { while (!byte_to_ins_flags[--target]); byte_to_ins_flags[target] |= MVM_CFG_BB_END; } /* Next instruction is also a BB start. */ next_bbs = 1; arg_size += 4; break; } case MVM_operand_spesh_slot: ins_node->operands[i].lit_i16 = GET_I16(args, arg_size); arg_size += 2; break; default: MVM_exception_throw_adhoc(tc, "Spesh: unknown operand type %d in graph building (op %s)", (int)type, ins_node->info->name); } } break; } } /* We specially handle the jumplist case, which needs to mark all of * the possible places we could jump to in the following instructions * as starts of basic blocks. It is, in itself, the end of one. Note * we jump to the instruction after the n jump points if none match, * so that is marked too. */ if (opcode == MVM_OP_jumplist) { MVMint64 n = MVM_BC_get_I64(args, 0); for (i = 0; i <= n; i++) byte_to_ins_flags[(pc - g->bytecode) + 12 + i * 6] |= MVM_CFG_BB_START; byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; } /* Invocations, returns, and throws are basic block ends. */ switch (opcode) { case MVM_OP_invoke_v: case MVM_OP_invoke_i: case MVM_OP_invoke_n: case MVM_OP_invoke_s: case MVM_OP_invoke_o: case MVM_OP_return_i: case MVM_OP_return_n: case MVM_OP_return_s: case MVM_OP_return_o: case MVM_OP_return: case MVM_OP_throwdyn: case MVM_OP_throwlex: case MVM_OP_throwlexotic: case MVM_OP_throwcatdyn: case MVM_OP_throwcatlex: case MVM_OP_throwcatlexotic: case MVM_OP_die: case MVM_OP_rethrow: case MVM_OP_resume: byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; next_bbs = 1; break; } /* Final instruction is basic block end. */ if (pc + 2 + arg_size == end) byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; /* Caculate next instruction's PC. */ pc += 2 + arg_size; /* If this is a deopt point opcode... */ if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ONE)) add_deopt_annotation(tc, g, ins_node, pc, MVM_SPESH_ANN_DEOPT_ONE_INS); if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ALL)) add_deopt_annotation(tc, g, ins_node, pc, MVM_SPESH_ANN_DEOPT_ALL_INS); if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_OSR)) add_deopt_annotation(tc, g, ins_node, pc, MVM_SPESH_ANN_DEOPT_OSR); /* Go to next instruction. */ ins_idx++; } /* Annotate instructions that are handler-significant. */ for (i = 0; i < g->num_handlers; i++) { MVMSpeshIns *start_ins = ins_flat[byte_to_ins_flags[g->handlers[i].start_offset] >> 2]; MVMSpeshIns *end_ins = ins_flat[byte_to_ins_flags[g->handlers[i].end_offset] >> 2]; MVMSpeshIns *goto_ins = ins_flat[byte_to_ins_flags[g->handlers[i].goto_offset] >> 2]; MVMSpeshAnn *start_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); MVMSpeshAnn *end_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); MVMSpeshAnn *goto_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); start_ann->next = start_ins->annotations; start_ann->type = MVM_SPESH_ANN_FH_START; start_ann->data.frame_handler_index = i; start_ins->annotations = start_ann; end_ann->next = end_ins->annotations; end_ann->type = MVM_SPESH_ANN_FH_END; end_ann->data.frame_handler_index = i; end_ins->annotations = end_ann; goto_ann->next = goto_ins->annotations; goto_ann->type = MVM_SPESH_ANN_FH_GOTO; goto_ann->data.frame_handler_index = i; goto_ins->annotations = goto_ann; } /* Annotate instructions that are inline start/end points. */ for (i = 0; i < g->num_inlines; i++) { MVMSpeshIns *start_ins = ins_flat[byte_to_ins_flags[g->inlines[i].start] >> 2]; MVMSpeshIns *end_ins = ins_flat[byte_to_ins_flags[g->inlines[i].end] >> 2]; MVMSpeshAnn *start_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); MVMSpeshAnn *end_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); start_ann->next = start_ins->annotations; start_ann->type = MVM_SPESH_ANN_INLINE_START; start_ann->data.inline_idx = i; start_ins->annotations = start_ann; end_ann->next = end_ins->annotations; end_ann->type = MVM_SPESH_ANN_INLINE_END; end_ann->data.inline_idx = i; end_ins->annotations = end_ann; } /* Now for the second pass, where we assemble the basic blocks. Also we * build a lookup table of instructions that start a basic block to that * basic block, for the final CFG construction. We make the entry block a * special one, containing a noop; it will have any exception handler * targets linked from it, so they show up in the graph. */ g->entry = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshBB)); g->entry->first_ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); g->entry->first_ins->info = get_op_info(tc, cu, 0); g->entry->last_ins = g->entry->first_ins; g->entry->idx = 0; cur_bb = NULL; prev_bb = g->entry; last_ins = NULL; ins_to_bb = MVM_calloc(ins_idx, sizeof(MVMSpeshBB *)); ins_idx = 0; bb_idx = 1; for (i = 0; i < g->bytecode_size; i++) { MVMSpeshIns *cur_ins; /* Skip zeros; no instruction here. */ if (!byte_to_ins_flags[i]) continue; /* Get current instruction. */ cur_ins = ins_flat[byte_to_ins_flags[i] >> 2]; /* Start of a basic block? */ if (byte_to_ins_flags[i] & MVM_CFG_BB_START) { /* Should not already be in a basic block. */ if (cur_bb) { MVM_spesh_graph_destroy(tc, g); MVM_exception_throw_adhoc(tc, "Spesh: confused during basic block analysis (in block)"); } /* Create it, and set first instruction and index. */ cur_bb = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshBB)); cur_bb->first_ins = cur_ins; cur_bb->idx = bb_idx; bb_idx++; /* Record instruction -> BB start mapping. */ ins_to_bb[ins_idx] = cur_bb; /* Link it to the previous one. */ prev_bb->linear_next = cur_bb; } /* Should always be in a BB at this point. */ if (!cur_bb) { MVM_spesh_graph_destroy(tc, g); MVM_exception_throw_adhoc(tc, "Spesh: confused during basic block analysis (no block)"); } /* Add instruction into double-linked per-block instruction list. */ if (last_ins) { last_ins->next = cur_ins; cur_ins->prev = last_ins; } last_ins = cur_ins; /* End of a basic block? */ if (byte_to_ins_flags[i] & MVM_CFG_BB_END) { cur_bb->last_ins = cur_ins; prev_bb = cur_bb; cur_bb = NULL; last_ins = NULL; } ins_idx++; } g->num_bbs = bb_idx; /* Finally, link the basic blocks up to form a CFG. Along the way, any of * the instruction operands get the target BB stored. */ cur_bb = g->entry; while (cur_bb) { /* If it's the first block, it's a special case; successors are the * real successor and all exception handlers. */ if (cur_bb == g->entry) { cur_bb->num_succ = 1 + g->num_handlers; cur_bb->succ = MVM_spesh_alloc(tc, g, cur_bb->num_succ * sizeof(MVMSpeshBB *)); cur_bb->succ[0] = cur_bb->linear_next; for (i = 0; i < g->num_handlers; i++) { MVMuint32 offset = g->handlers[i].goto_offset; cur_bb->succ[i + 1] = ins_to_bb[byte_to_ins_flags[offset] >> 2]; } } /* Otherwise, consider the last instruction, to see how we leave the BB. */ else { switch (cur_bb->last_ins->info->opcode) {
/* Tries to generate a specialization of the bytecode, for the given callsite * and argument tuple. */ MVMSpeshCandidate * MVM_spesh_candidate_generate(MVMThreadContext *tc, MVMStaticFrame *static_frame, MVMCallsite *callsite, MVMRegister *args) { MVMSpeshCandidate *result; MVMSpeshGuard *guards; MVMSpeshCode *sc; MVMint32 num_spesh_slots, num_guards, *deopts, num_deopts; MVMCollectable **spesh_slots; char *before, *after; /* Generate the specialization. */ MVMSpeshGraph *sg = MVM_spesh_graph_create(tc, static_frame); if (tc->instance->spesh_log_fh) before = MVM_spesh_dump(tc, sg); MVM_spesh_args(tc, sg, callsite, args); MVM_spesh_facts_discover(tc, sg); MVM_spesh_optimize(tc, sg); if (tc->instance->spesh_log_fh) after = MVM_spesh_dump(tc, sg); sc = MVM_spesh_codegen(tc, sg); num_spesh_slots = sg->num_spesh_slots; spesh_slots = sg->spesh_slots; num_guards = sg->num_guards; guards = sg->guards; num_deopts = sg->num_deopt_addrs; deopts = sg->deopt_addrs; MVM_spesh_graph_destroy(tc, sg); /* Now try to add it. Note there's a slim chance another thread beat us * to doing so. Also other threads can read the specializations without * lock, so make absolutely sure we increment the count of them after we * add the new one. */ result = NULL; uv_mutex_lock(&tc->instance->mutex_spesh_install); if (static_frame->body.num_spesh_candidates < MVM_SPESH_LIMIT) { MVMint32 num_spesh = static_frame->body.num_spesh_candidates; MVMint32 i; for (i = 0; i < num_spesh; i++) { MVMSpeshCandidate *compare = &static_frame->body.spesh_candidates[i]; if (compare->cs == callsite && compare->num_guards == num_guards && memcmp(compare->guards, guards, num_guards * sizeof(MVMSpeshGuard)) == 0) { /* Beaten! */ result = &static_frame->body.spesh_candidates[i]; break; } } if (!result) { if (!static_frame->body.spesh_candidates) static_frame->body.spesh_candidates = malloc( MVM_SPESH_LIMIT * sizeof(MVMSpeshCandidate)); result = &static_frame->body.spesh_candidates[num_spesh]; result->cs = callsite; result->num_guards = num_guards; result->guards = guards; result->bytecode = sc->bytecode; result->bytecode_size = sc->bytecode_size; result->handlers = sc->handlers; result->num_spesh_slots = num_spesh_slots; result->spesh_slots = spesh_slots; result->num_deopts = num_deopts; result->deopts = deopts; MVM_barrier(); static_frame->body.num_spesh_candidates++; if (static_frame->common.header.flags & MVM_CF_SECOND_GEN) if (!(static_frame->common.header.flags & MVM_CF_IN_GEN2_ROOT_LIST)) MVM_gc_root_gen2_add(tc, (MVMCollectable *)static_frame); if (tc->instance->spesh_log_fh) { char *c_name = MVM_string_utf8_encode_C_string(tc, static_frame->body.name); char *c_cuid = MVM_string_utf8_encode_C_string(tc, static_frame->body.cuuid); fprintf(tc->instance->spesh_log_fh, "Specialized '%s' (cuid: %s)\n\n", c_name, c_cuid); fprintf(tc->instance->spesh_log_fh, "Before:\n%s\nAfter:\n%s\n\n========\n\n", before, after); free(before); free(after); free(c_name); free(c_cuid); } } } if (!result) { free(sc->bytecode); free(sc->handlers); } uv_mutex_unlock(&tc->instance->mutex_spesh_install); free(sc); return result; }