MBlock *ComparisonStageIR::finish_stage(MVar *num_inputs, MVar *num_outputs) { // return the return_idx MBlock *final_block = new MBlock("finish"); final_block->register_for_delete(); if (!should_override_ret_val()) { // (n^2-n)/2 MMul *mmul = new MMul(num_outputs, num_outputs); mmul->register_for_delete(); MSub *msub = new MSub(mmul->get_result(), num_outputs); msub->register_for_delete(); MDiv *mdiv = new MDiv(msub->get_result(), MVar::create_constant<long>(2)); mdiv->register_for_delete(); final_block->add_expr(mmul); final_block->add_expr(msub); final_block->add_expr(mdiv); MStatement *set_return_idx = new MStatement(get_return_idx(), mdiv->get_result()); set_return_idx->register_for_delete(); final_block->add_expr(set_return_idx); MRetVal *ret_val = new MRetVal(get_return_idx()); ret_val->register_for_delete(); final_block->add_expr(ret_val); } else { final_block->add_expr(get_ret_block()); MRetVal *ret_val = new MRetVal(get_override_ret_val()); ret_val->register_for_delete(); final_block->add_expr(ret_val); } return final_block; }
MVar *ComparisonStageIR::compute_linear_index(MVar *left_idx, MVar *right_bound, MVar *right_idx, MBlock *block) const { MMul *mul = new MMul(left_idx, right_bound); mul->register_for_delete(); block->add_expr(mul); MAdd *add = new MAdd(mul->get_result(), right_idx); add->register_for_delete(); block->add_expr(add); return add->get_result(); }
// Convert all components of a linear sum *except* its constant to a definition, // adding any necessary instructions to the end of block. static inline MDefinition * ConvertLinearSum(MBasicBlock *block, const LinearSum &sum) { MDefinition *def = NULL; for (size_t i = 0; i < sum.numTerms(); i++) { LinearTerm term = sum.term(i); JS_ASSERT(!term.term->isConstant()); if (term.scale == 1) { if (def) { def = MAdd::New(def, term.term); def->toAdd()->setInt32(); block->insertBefore(block->lastIns(), def->toInstruction()); } else { def = term.term; } } else { if (!def) { def = MConstant::New(Int32Value(0)); block->insertBefore(block->lastIns(), def->toInstruction()); } if (term.scale == -1) { def = MSub::New(def, term.term); def->toSub()->setInt32(); block->insertBefore(block->lastIns(), def->toInstruction()); } else { MConstant *factor = MConstant::New(Int32Value(term.scale)); block->insertBefore(block->lastIns(), factor); MMul *mul = MMul::New(term.term, factor); mul->setInt32(); block->insertBefore(block->lastIns(), mul); def = MAdd::New(def, mul); def->toAdd()->setInt32(); block->insertBefore(block->lastIns(), def->toInstruction()); } } } if (!def) { def = MConstant::New(Int32Value(0)); block->insertBefore(block->lastIns(), def->toInstruction()); } return def; }
bool CodeGeneratorX86Shared::visitMulI(LMulI *ins) { const LAllocation *lhs = ins->lhs(); const LAllocation *rhs = ins->rhs(); MMul *mul = ins->mir(); JS_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow()); if (rhs->isConstant()) { // Bailout on -0.0 int32_t constant = ToInt32(rhs); if (mul->canBeNegativeZero() && constant <= 0) { Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal; masm.testl(ToRegister(lhs), ToRegister(lhs)); if (!bailoutIf(bailoutCond, ins->snapshot())) return false; } switch (constant) { case -1: masm.negl(ToOperand(lhs)); break; case 0: masm.xorl(ToOperand(lhs), ToRegister(lhs)); return true; // escape overflow check; case 1: // nop return true; // escape overflow check; case 2: masm.addl(ToOperand(lhs), ToRegister(lhs)); break; default: if (!mul->canOverflow() && constant > 0) { // Use shift if cannot overflow and constant is power of 2 int32_t shift = FloorLog2(constant); if ((1 << shift) == constant) { masm.shll(Imm32(shift), ToRegister(lhs)); return true; } } masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs)); } // Bailout on overflow if (mul->canOverflow() && !bailoutIf(Assembler::Overflow, ins->snapshot())) return false; } else { masm.imull(ToOperand(rhs), ToRegister(lhs)); // Bailout on overflow if (mul->canOverflow() && !bailoutIf(Assembler::Overflow, ins->snapshot())) return false; if (mul->canBeNegativeZero()) { // Jump to an OOL path if the result is 0. MulNegativeZeroCheck *ool = new MulNegativeZeroCheck(ins); if (!addOutOfLineCode(ool)) return false; masm.testl(ToRegister(lhs), ToRegister(lhs)); masm.j(Assembler::Zero, ool->entry()); masm.bind(ool->rejoin()); } } return true; }
static inline bool NeedNegativeZeroCheck(MDefinition *def) { // Test if all uses have the same symantic for -0 and 0 for (MUseIterator use = def->usesBegin(); use != def->usesEnd(); use++) { if (use->node()->isResumePoint()) return true; MDefinition *use_def = use->node()->toDefinition(); switch (use_def->op()) { case MDefinition::Op_Add: { // x + y gives -0, when both x and y are -0 // - When other operand can't produce -0 (i.e. all opcodes, except Mul/Div/ToInt32) // Remove negative zero check on this operand // - When both operands can produce -0 (both Mul/Div/ToInt32 opcode) // We can remove the check eagerly on this operand. MDefinition *operand = use_def->getOperand(0); if (operand == def) { operand = use_def->getOperand(1); // Don't remove check when both operands are same definition // As removing it from one operand, will remove it from both. if (operand == def) return true; } // Check if check is possibly eagerly removed on other operand // and don't remove check eagerly on this operand in that case. if (operand->isMul()) { MMul *mul = operand->toMul(); if (!mul->canBeNegativeZero()) return true; } else if (operand->isDiv()) { MDiv *div = operand->toDiv(); if (!div->canBeNegativeZero()) return true; } else if (operand->isToInt32()) { MToInt32 *int32 = operand->toToInt32(); if (!int32->canBeNegativeZero()) return true; } else if (operand->isPhi()) { return true; } break; } case MDefinition::Op_StoreElement: case MDefinition::Op_StoreElementHole: case MDefinition::Op_LoadElement: case MDefinition::Op_LoadElementHole: case MDefinition::Op_LoadTypedArrayElement: case MDefinition::Op_LoadTypedArrayElementHole: case MDefinition::Op_CharCodeAt: case MDefinition::Op_Mod: case MDefinition::Op_Sub: // Only allowed to remove check when definition is the second operand if (use_def->getOperand(0) == def) return true; if (use_def->numOperands() > 2) { for (size_t i = 2; i < use_def->numOperands(); i++) { if (use_def->getOperand(i) == def) return true; } } break; case MDefinition::Op_BoundsCheck: // Only allowed to remove check when definition is the first operand if (use_def->getOperand(1) == def) return true; break; case MDefinition::Op_ToString: case MDefinition::Op_FromCharCode: case MDefinition::Op_TableSwitch: case MDefinition::Op_Compare: case MDefinition::Op_BitAnd: case MDefinition::Op_BitOr: case MDefinition::Op_BitXor: case MDefinition::Op_Abs: // Always allowed to remove check. No matter which operand. break; default: return true; } } return false; }
std::vector<MVar *> ComparisonStageIR::create_stage_inputs(MBlock *pipeline, MVar *prev_stage_output_elements, MVar *prev_stage_result) { std::vector<MVar *> calling_args; MVar *left_num_inputs = nullptr; MVar *right_num_inputs = nullptr; if (!left_input && !right_input) { // take the outputs from the previous stage in the pipeline and duplicate them // left calling_args.push_back(prev_stage_output_elements); // these are now the input elements calling_args.push_back(prev_stage_result); // in the normal case, this is how many output elements were processed in the previous stage left_num_inputs = prev_stage_result; // right calling_args.push_back(prev_stage_output_elements); calling_args.push_back(prev_stage_result); right_num_inputs = prev_stage_result; } else if (left_input && !right_input) { // take the user inputs for left and previous stage outputs for the right // left left_num_inputs = determine_inputs(left_input, &calling_args); // right calling_args.push_back(prev_stage_output_elements); calling_args.push_back(prev_stage_result); right_num_inputs = prev_stage_result; } else if (!left_input && right_input) { // take the previous stage outputs for the left and the user inputs for the right // left calling_args.push_back(prev_stage_output_elements); calling_args.push_back(prev_stage_result); left_num_inputs = prev_stage_result; // right right_num_inputs = determine_inputs(right_input, &calling_args); } else { // take the user inputs for left and right // left left_num_inputs = determine_inputs(left_input, &calling_args); // right right_num_inputs = determine_inputs(right_input, &calling_args); } if (compareVIO) { // this type has an output element // create the new output elements MVar *created_elements = new MVar(create_type<MElementType **>()); created_elements->register_for_delete(); // figure out how many outputs there are MMul *mmul = new MMul(left_num_inputs, right_num_inputs); mmul->register_for_delete(); pipeline->add_expr(mmul); // calling_args.push_back(created_elements); MVar *num_outputs_created; if ((left_input || right_input) && !_force_commutative) { // left * right (O(nm)) created_elements->set_size(mmul->get_result()); // calling_args.push_back(mmul->get_result()); num_outputs_created = mmul->get_result(); } else { // assumes that left and right are the same // (n^2-n)/2 since we assume commutative (O(n^2)) MSub *msub = new MSub(mmul->get_result(), left_num_inputs); msub->register_for_delete(); MDiv *mdiv = new MDiv(msub->get_result(), MVar::create_constant<long>(2)); mdiv->register_for_delete(); pipeline->add_expr(msub); pipeline->add_expr(mdiv); created_elements->set_size(mdiv->get_result()); // number to create // calling_args.push_back(mdiv->get_result()); // the number created above num_outputs_created = mdiv->get_result(); } MStatement *statement = new MStatement(created_elements, nullptr); statement->register_for_delete(); pipeline->add_expr(statement); // create ElementColl * MSub *sub = new MSub(num_outputs_created, MVar::create_constant<long>(1)); sub->register_for_delete(); pipeline->add_expr(sub); MVar *element_coll = new MVar(create_type<MElementCollType *>()); element_coll->register_for_delete(); MStatement *set_element_coll = new MStatement(element_coll, nullptr); set_element_coll->register_for_delete(); set_element_coll->add_parameter(num_outputs_created); set_element_coll->add_parameter(sub->get_result()); set_element_coll->add_parameter(created_elements); pipeline->add_expr(set_element_coll); calling_args.push_back(element_coll); calling_args.push_back(num_outputs_created); } return calling_args; }
// TODO once I get the indexing right, I can fix preallocation so that only the correct number of outputs are preallocated, not just N^2 // TODO Can also fix the number output (does that need to be fixed?) std::vector<MVar *> ComparisonStageIR::create_user_function_inputs(MBlock **mblock, MFor *outer_loop, MFor *outer_tiled_inner, MFor *inner_loop, MFor *inner_tiled_inner, MVar *, bool, MVar *, MVar *, MVar *original_num_inputs_left, MVar *original_num_inputs_right) { // body of the outer MFor passed in is the inner MFor loop std::vector<MVar *> stage_args = get_stage_function()->get_loaded_args();//get_args(); std::vector<MVar *> args; // Think of the indices into the two input arrays as coordinates into a matrix. The outer coordinate is for N, i.e. the row number. // The inner coordinate is for M, i.e. the column number. MVar *final_outer_coordinate; MVar *final_inner_coordinate; // get the outer and inner input elements // if tiled, the computation for the indices is different if (is_tiled() && is_tileable()) { if ((left_input || right_input) && !_force_commutative) { // N x M assert(original_num_inputs_left && original_num_inputs_right); // sanity check MVar *n = outer_loop->get_loop_index(); MVar *m = inner_loop->get_loop_index(); MVar *nn = outer_tiled_inner->get_loop_index(); MVar *mm = inner_tiled_inner->get_loop_index(); // outer = n * tile_size_N + nn final_outer_coordinate = get_element(stage_args[0], n, tile_size_N, nn, outer_tiled_inner->get_body_block(), inner_loop, &args, original_num_inputs_left, nullptr); // inner = m * M + mm final_inner_coordinate = get_element(stage_args[2], m, tile_size_M, mm, inner_tiled_inner->get_body_block(), outer_tiled_inner, &args, original_num_inputs_right, mblock); } else { // (N^2-N)/2 assert(original_num_inputs_left && original_num_inputs_right); // sanity check MVar *n = outer_loop->get_loop_index(); MVar *m = inner_loop->get_loop_index(); MVar *nn = outer_tiled_inner->get_loop_index(); MVar *mm = inner_tiled_inner->get_loop_index(); // outer = n * tile_size_N + nn final_outer_coordinate = get_element(stage_args[0], n, tile_size_N, nn, outer_tiled_inner->get_body_block(), inner_loop, &args, original_num_inputs_left, nullptr); // the outer doesn't change with commutativity // this code could almost be handled by get_element, but the conditional is more complex, so I just leave it here for now rather than // trying to refactor it. // inner = m * M + mm int inner_insert_idx = 0; MBlock *linear_inner = new MBlock(); linear_inner->register_for_delete(); MVar *inner_idx = compute_linear_index(m, tile_size_M, mm, linear_inner); inner_tiled_inner->get_body_block()->insert_at(linear_inner, inner_insert_idx++); final_inner_coordinate = inner_idx; // check that the inner index is still in range (< M) and that it is less than the outer idx // TODO this assumes that the integral value of true is 1. In the future, create an MTrue and MFalse type // that allows arithmetic to be done on it. Then I can plug in the actual values when generating the back end code, such as LLVM. MSLT *is_inner_in_range = new MSLT(inner_idx, original_num_inputs_right); is_inner_in_range->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_inner_in_range, inner_insert_idx++); MSLT *is_less_than_outer = new MSLT(inner_idx, final_outer_coordinate); is_less_than_outer->register_for_delete(); is_less_than_outer->override_name("inner_less_than_outer"); inner_tiled_inner->get_body_block()->insert_at(is_less_than_outer, inner_insert_idx++); // since we don't have a compound conditional type (YET), we get the results of the two SLT calls here. // If they sum to 2, then both are true since we assume true == 1. This way, we only need a single if // statement checking the value of the addition. MCast *is_inner_in_range_long = new MCast(is_inner_in_range->get_result(), MScalarType::get_long_type()); is_inner_in_range_long->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_inner_in_range_long, inner_insert_idx++); MCast *is_less_than_outer_long = new MCast(is_less_than_outer->get_result(), MScalarType::get_long_type()); is_less_than_outer_long->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_less_than_outer_long, inner_insert_idx++); MAdd *sum_of_conditionals = new MAdd(is_inner_in_range_long->get_casted(), is_less_than_outer_long->get_casted()); sum_of_conditionals->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(sum_of_conditionals, inner_insert_idx++); MEq *is_in_range_and_less_than = new MEq(sum_of_conditionals->get_result(), MVar::create_constant<long>(2)); is_in_range_and_less_than->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_in_range_and_less_than, inner_insert_idx++); MBlock *inner_is_in_range_and_less_than = new MBlock(); inner_is_in_range_and_less_than->register_for_delete(); MBlock *inner_not_in_range_nor_less_than = new MBlock(); inner_not_in_range_nor_less_than->register_for_delete(); MBlock *dummy_inner = new MBlock(); dummy_inner->register_for_delete(); MIfThenElse *inner_ite = new MIfThenElse(is_in_range_and_less_than->get_result(), inner_is_in_range_and_less_than, inner_not_in_range_nor_less_than, dummy_inner, nullptr); inner_ite->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(inner_ite, inner_insert_idx++); inner_ite->override_name("inner_ite"); // If in range, get the inner element and then go to the innermost tiled loop. // Since the innermost loop is already in outer_tiled_inner's body, remove it from there (and any other stuff that should only // execute if the we are in range) and then add it to the outer_is_in_range block. MIndex *get_inner_input = new MIndex(stage_args[2], inner_idx, create_type<MElementType *>(), "inner_input_element"); get_inner_input->register_for_delete(); inner_is_in_range_and_less_than->add_expr(get_inner_input); args.push_back(get_inner_input->get_result()); inner_is_in_range_and_less_than->add_exprs(inner_tiled_inner->get_body_block()->remove_range(inner_insert_idx++, -1)); // If out of range, continue to the next iteration of the outer_tiled_inner_loop MContinue *to_nn_loop = new MContinue(outer_tiled_inner); to_nn_loop->register_for_delete(); inner_not_in_range_nor_less_than->add_expr(to_nn_loop); *mblock = inner_is_in_range_and_less_than; } } else { // the loop indices are already setup by this point depending on whether we are NxM or N^2 MVar *current_outer_idx = outer_loop->get_loop_index(); MVar *current_inner_idx = inner_loop->get_loop_index(); final_outer_coordinate = current_outer_idx; final_inner_coordinate = current_inner_idx; MIndex *outer_element = new MIndex(stage_args[0], current_outer_idx, create_type<MElementType *>(), "outer_input_element"); outer_element->register_for_delete(); MIndex *inner_element = new MIndex(stage_args[2], current_inner_idx, create_type<MElementType *>(), "inner_input_element"); inner_element->register_for_delete(); outer_loop->get_body_block()->add_expr(outer_element); inner_loop->get_body_block()->add_expr(inner_element); args.push_back(outer_element->get_result()); args.push_back(inner_element->get_result()); *mblock = new MBlock(); (*mblock)->register_for_delete(); } // if this has an output, make the output element // this doesn't care if we are tiled or not. The equations are the same since we appropriately set the coordinates // above based on tiling or not. MVar *final_index; if (compareVIO) { // First create "shell" for a new Element* to be passed to the user MVar *new_element = new MVar(create_type<MElementType*>(), "output_element"); new_element->register_for_delete(); // create the statement that will actually initialize the value // compute the current output index if ((left_input || right_input) && !_force_commutative) { // N x M // equation for linearizing the coordinates is: // final_outer_coordinate X original_num_inputs_right + final_inner_coordinate MMul *mul = new MMul(final_outer_coordinate, original_num_inputs_right); mul->register_for_delete(); (*mblock)->add_expr(mul); MAdd *add = new MAdd(mul->get_result(), final_inner_coordinate); add->register_for_delete(); (*mblock)->add_expr(add); final_index = add->get_result(); } else { // N^2 and/or commutative // equation for linearizing the coordinates is: // [final_outer_coordinate^2 - final_outer_coordinate]/2 + final_inner_coordinate // the division term in this equation tells you how many elements have come before you. Then the addition // adds on your position in the current row. // It's not straightforward like the NxM version because we are only doing comparisons between elements // in the lower triangular part of the matrix (excluding the diagonal), so the linear indices from // the NxM version would give non-consecutive indices. This basically takes those indices and compresses // them down from 0 to however many comparisons we do. MMul *squared = new MMul(final_outer_coordinate, final_outer_coordinate); squared->register_for_delete(); (*mblock)->add_expr(squared); MSub *sub = new MSub(squared->get_result(), final_outer_coordinate); sub->register_for_delete(); (*mblock)->add_expr(sub); MDiv *div = new MDiv(sub->get_result(), MVar::create_constant<long>(2)); div->register_for_delete(); (*mblock)->add_expr(div); MAdd *add = new MAdd(div->get_result(), final_inner_coordinate); add->register_for_delete(); (*mblock)->add_expr(add); final_index = add->get_result(); } MStatement *set_new_element = new MStatement(new_element, nullptr); // nullptr tells it to create a new value set_new_element->register_for_delete(); set_new_element->add_parameter(final_index); // this is the id of the Element to be created (*mblock)->add_expr(set_new_element); args.push_back(new_element); // now set the Element in the output array MStatementIdx *set = new MStatementIdx(stage_args[4], new_element, final_index); set->register_for_delete(); (*mblock)->add_expr(set); } return args; }