/* Generate code for transformation 1 (with MODE and OPERATION, operands OP1 and OP2, whose value is expected to be VALUE, result TARGET and probability of taking the optimal path PROB). */ static rtx gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation, rtx target, rtx op1, rtx op2, gcov_type value, int prob) { rtx tmp, tmp1, jump; rtx neq_label = gen_label_rtx (); rtx end_label = gen_label_rtx (); rtx sequence; start_sequence (); if (!REG_P (op2)) { tmp = gen_reg_rtx (mode); emit_move_insn (tmp, copy_rtx (op2)); } else tmp = op2; do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX, NULL_RTX, neq_label); /* Add branch probability to jump we just created. */ jump = get_last_insn (); REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (REG_BR_PROB_BASE - prob), REG_NOTES (jump)); tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value)); tmp1 = force_operand (tmp1, target); if (tmp1 != target) emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); emit_jump_insn (gen_jump (end_label)); emit_barrier (); emit_label (neq_label); tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); tmp1 = force_operand (tmp1, target); if (tmp1 != target) emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); emit_label (end_label); sequence = get_insns (); end_sequence (); rebuild_jump_labels (sequence); return sequence; }
static rtx gen_speculative_prefetch (rtx address, gcov_type delta, int write) { rtx tmp; rtx sequence; /* TODO: we do the prefetching for just one iteration ahead, which often is not enough. */ start_sequence (); if (offsettable_address_p (0, VOIDmode, address)) tmp = plus_constant (copy_rtx (address), delta); else { tmp = simplify_gen_binary (PLUS, Pmode, copy_rtx (address), GEN_INT (delta)); tmp = force_operand (tmp, NULL); } if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate) (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode)) tmp = force_reg (Pmode, tmp); emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3))); sequence = get_insns (); end_sequence (); return sequence; }
/* Generate code for transformations 3 and 4 (with MODE and OPERATION, operands OP1 and OP2, result TARGET, at most SUB subtractions, and probability of taking the optimal path(s) PROB1 and PROB2). */ static rtx gen_mod_subtract (enum machine_mode mode, enum rtx_code operation, rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2) { rtx tmp, tmp1, jump; rtx end_label = gen_label_rtx (); rtx sequence; int i; start_sequence (); if (!REG_P (op2)) { tmp = gen_reg_rtx (mode); emit_move_insn (tmp, copy_rtx (op2)); } else tmp = op2; emit_move_insn (target, copy_rtx (op1)); do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, NULL_RTX, end_label); /* Add branch probability to jump we just created. */ jump = get_last_insn (); REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob1), REG_NOTES (jump)); for (i = 0; i < sub; i++) { tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target, 0, OPTAB_WIDEN); if (tmp1 != target) emit_move_insn (target, tmp1); do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, NULL_RTX, end_label); /* Add branch probability to jump we just created. */ jump = get_last_insn (); REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob2), REG_NOTES (jump)); } tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp)); tmp1 = force_operand (tmp1, target); if (tmp1 != target) emit_move_insn (target, tmp1); emit_label (end_label); sequence = get_insns (); end_sequence (); rebuild_jump_labels (sequence); return sequence; }
/* Generate code for transformation 2 (with MODE and OPERATION, operands OP1 and OP2, result TARGET and probability of taking the optimal path PROB). */ static rtx gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target, rtx op1, rtx op2, int prob) { rtx tmp, tmp1, tmp2, tmp3, jump; rtx neq_label = gen_label_rtx (); rtx end_label = gen_label_rtx (); rtx sequence; start_sequence (); if (!REG_P (op2)) { tmp = gen_reg_rtx (mode); emit_move_insn (tmp, copy_rtx (op2)); } else tmp = op2; tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN); tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX, 0, OPTAB_WIDEN); do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX, NULL_RTX, neq_label); /* Add branch probability to jump we just created. */ jump = get_last_insn (); REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (REG_BR_PROB_BASE - prob), REG_NOTES (jump)); tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target, 0, OPTAB_WIDEN); if (tmp3 != target) emit_move_insn (copy_rtx (target), tmp3); emit_jump_insn (gen_jump (end_label)); emit_barrier (); emit_label (neq_label); tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); tmp1 = force_operand (tmp1, target); if (tmp1 != target) emit_move_insn (target, tmp1); emit_label (end_label); sequence = get_insns (); end_sequence (); rebuild_jump_labels (sequence); return sequence; }
static void gen_addr_rtx (enum machine_mode address_mode, rtx symbol, rtx base, rtx index, rtx step, rtx offset, rtx *addr, rtx **step_p, rtx **offset_p) { rtx act_elem; *addr = NULL_RTX; if (step_p) *step_p = NULL; if (offset_p) *offset_p = NULL; if (index) { act_elem = index; if (step) { act_elem = gen_rtx_MULT (address_mode, act_elem, step); if (step_p) *step_p = &XEXP (act_elem, 1); } *addr = act_elem; } if (base && base != const0_rtx) { if (*addr) *addr = simplify_gen_binary (PLUS, address_mode, base, *addr); else *addr = base; } if (symbol) { act_elem = symbol; if (offset) { act_elem = gen_rtx_PLUS (address_mode, act_elem, offset); if (offset_p) *offset_p = &XEXP (act_elem, 1); if (GET_CODE (symbol) == SYMBOL_REF || GET_CODE (symbol) == LABEL_REF || GET_CODE (symbol) == CONST) act_elem = gen_rtx_CONST (address_mode, act_elem); } if (*addr) *addr = gen_rtx_PLUS (address_mode, *addr, act_elem); else *addr = act_elem; } else if (offset) { if (*addr) { *addr = gen_rtx_PLUS (address_mode, *addr, offset); if (offset_p) *offset_p = &XEXP (*addr, 1); } else { *addr = offset; if (offset_p) *offset_p = addr; } } if (!*addr) *addr = const0_rtx; }
static bool propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) { rtx x = *px, tem = NULL_RTX, op0, op1, op2; enum rtx_code code = GET_CODE (x); machine_mode mode = GET_MODE (x); machine_mode op_mode; bool can_appear = (flags & PR_CAN_APPEAR) != 0; bool valid_ops = true; if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x)) { /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether they have side effects or not). */ *px = (side_effects_p (x) ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx) : gen_rtx_SCRATCH (GET_MODE (x))); return false; } /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an address, and we are *not* inside one. */ if (x == old_rtx) { *px = new_rtx; return can_appear; } /* If this is an expression, try recursive substitution. */ switch (GET_RTX_CLASS (code)) { case RTX_UNARY: op0 = XEXP (x, 0); op_mode = GET_MODE (op0); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0)) return true; tem = simplify_gen_unary (code, mode, op0, op_mode); break; case RTX_BIN_ARITH: case RTX_COMM_ARITH: op0 = XEXP (x, 0); op1 = XEXP (x, 1); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; tem = simplify_gen_binary (code, mode, op0, op1); break; case RTX_COMPARE: case RTX_COMM_COMPARE: op0 = XEXP (x, 0); op1 = XEXP (x, 1); op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; tem = simplify_gen_relational (code, mode, op_mode, op0, op1); break; case RTX_TERNARY: case RTX_BITFIELD_OPS: op0 = XEXP (x, 0); op1 = XEXP (x, 1); op2 = XEXP (x, 2); op_mode = GET_MODE (op0); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) return true; if (op_mode == VOIDmode) op_mode = GET_MODE (op0); tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2); break; case RTX_EXTRA: /* The only case we try to handle is a SUBREG. */ if (code == SUBREG) { op0 = XEXP (x, 0); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0)) return true; tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); } break; case RTX_OBJ: if (code == MEM && x != new_rtx) { rtx new_op0; op0 = XEXP (x, 0); /* There are some addresses that we cannot work on. */ if (!can_simplify_addr (op0)) return true; op0 = new_op0 = targetm.delegitimize_address (op0); valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); /* Dismiss transformation that we do not want to carry on. */ if (!valid_ops || new_op0 == op0 || !(GET_MODE (new_op0) == GET_MODE (op0) || GET_MODE (new_op0) == VOIDmode)) return true; canonicalize_address (new_op0); /* Copy propagations are always ok. Otherwise check the costs. */ if (!(REG_P (old_rtx) && REG_P (new_rtx)) && !should_replace_address (op0, new_op0, GET_MODE (x), MEM_ADDR_SPACE (x), flags & PR_OPTIMIZE_FOR_SPEED)) return true; tem = replace_equiv_address_nv (x, new_op0); } else if (code == LO_SUM) { op0 = XEXP (x, 0); op1 = XEXP (x, 1); /* The only simplification we do attempts to remove references to op0 or make it constant -- in both cases, op0's invalidity will not make the result invalid. */ propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; /* (lo_sum (high x) x) -> x */ if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1)) tem = op1; else tem = gen_rtx_LO_SUM (mode, op0, op1); /* OP1 is likely not a legitimate address, otherwise there would have been no LO_SUM. We want it to disappear if it is invalid, return false in that case. */ return memory_address_p (mode, tem); } else if (code == REG) { if (rtx_equal_p (x, old_rtx)) { *px = new_rtx; return can_appear; } } break; default: break; } /* No change, no trouble. */ if (tem == NULL_RTX) return true; *px = tem; /* Allow replacements that simplify operations on a vector or complex value to a component. The most prominent case is (subreg ([vec_]concat ...)). */ if (REG_P (tem) && !HARD_REGISTER_P (tem) && (VECTOR_MODE_P (GET_MODE (new_rtx)) || COMPLEX_MODE_P (GET_MODE (new_rtx))) && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx))) return true; /* The replacement we made so far is valid, if all of the recursive replacements were valid, or we could simplify everything to a constant. */ return valid_ops || can_appear || CONSTANT_P (tem); }
static void doloop_modify (struct loop *loop, struct niter_desc *desc, rtx doloop_seq, rtx condition, rtx count) { rtx counter_reg; rtx tmp, noloop = NULL_RTX; rtx sequence; rtx jump_insn; rtx jump_label; int nonneg = 0, irr; bool increment_count; basic_block loop_end = desc->out_edge->src; enum machine_mode mode; jump_insn = BB_END (loop_end); if (dump_file) { fprintf (dump_file, "Doloop: Inserting doloop pattern ("); if (desc->const_iter) fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); else fputs ("runtime", dump_file); fputs (" iterations).\n", dump_file); } /* Discard original jump to continue loop. The original compare result may still be live, so it cannot be discarded explicitly. */ delete_insn (jump_insn); counter_reg = XEXP (condition, 0); if (GET_CODE (counter_reg) == PLUS) counter_reg = XEXP (counter_reg, 0); mode = GET_MODE (counter_reg); increment_count = false; switch (GET_CODE (condition)) { case NE: /* Currently only NE tests against zero and one are supported. */ if (XEXP (condition, 1) == const1_rtx) { increment_count = true; noloop = const1_rtx; } else if (XEXP (condition, 1) == const0_rtx) noloop = const0_rtx; else abort (); break; case GE: /* Currently only GE tests against zero are supported. */ if (XEXP (condition, 1) != const0_rtx) abort (); noloop = constm1_rtx; /* The iteration count does not need incrementing for a GE test. */ increment_count = false; /* Determine if the iteration counter will be non-negative. Note that the maximum value loaded is iterations_max - 1. */ if (desc->niter_max <= ((unsigned HOST_WIDEST_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) nonneg = 1; break; /* Abort if an invalid doloop pattern has been generated. */ default: abort (); } if (increment_count) count = simplify_gen_binary (PLUS, mode, count, const1_rtx); /* Insert initialization of the count register into the loop header. */ start_sequence (); tmp = force_operand (count, counter_reg); convert_move (counter_reg, tmp, 1); sequence = get_insns (); end_sequence (); emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); if (desc->noloop_assumptions) { rtx ass = copy_rtx (desc->noloop_assumptions); basic_block preheader = loop_preheader_edge (loop)->src; basic_block set_zero = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); basic_block new_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); basic_block bb; edge te; gcov_type cnt; /* Expand the condition testing the assumptions and if it does not pass, reset the count register to 0. */ add_test (XEXP (ass, 0), preheader, set_zero); EDGE_SUCC (preheader, 0)->flags &= ~EDGE_FALLTHRU; cnt = EDGE_SUCC (preheader, 0)->count; EDGE_SUCC (preheader, 0)->probability = 0; EDGE_SUCC (preheader, 0)->count = 0; irr = EDGE_SUCC (preheader, 0)->flags & EDGE_IRREDUCIBLE_LOOP; te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr); te->probability = REG_BR_PROB_BASE; te->count = cnt; set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); set_zero->count = 0; set_zero->frequency = 0; for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1)) { bb = loop_split_edge_with (te, NULL_RTX); te = EDGE_SUCC (bb, 0); add_test (XEXP (ass, 0), bb, set_zero); make_edge (bb, set_zero, irr); } start_sequence (); convert_move (counter_reg, noloop, 0); sequence = get_insns (); end_sequence (); emit_insn_after (sequence, BB_END (set_zero)); } /* Some targets (eg, C4x) need to initialize special looping registers. */ #ifdef HAVE_doloop_begin { rtx init; unsigned level = get_loop_level (loop) + 1; init = gen_doloop_begin (counter_reg, desc->const_iter ? desc->niter_expr : const0_rtx, desc->niter_max, GEN_INT (level)); if (init) { start_sequence (); emit_insn (init); sequence = get_insns (); end_sequence (); emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); } } #endif /* Insert the new low-overhead looping insn. */ emit_jump_insn_after (doloop_seq, BB_END (loop_end)); jump_insn = BB_END (loop_end); jump_label = block_label (desc->in_edge->dest); JUMP_LABEL (jump_insn) = jump_label; LABEL_NUSES (jump_label)++; /* Ensure the right fallthru edge is marked, for case we have reversed the condition. */ desc->in_edge->flags &= ~EDGE_FALLTHRU; desc->out_edge->flags |= EDGE_FALLTHRU; /* Add a REG_NONNEG note if the actual or estimated maximum number of iterations is non-negative. */ if (nonneg) { REG_NOTES (jump_insn) = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); } }
/* Find values inside INSN for that we want to measure histograms for division/modulo optimization and stores them to VALUES. */ static void insn_divmod_values_to_profile (rtx insn, histogram_values *values) { rtx set, set_src, op1, op2; enum machine_mode mode; histogram_value hist; if (!INSN_P (insn)) return; set = single_set (insn); if (!set) return; mode = GET_MODE (SET_DEST (set)); if (!INTEGRAL_MODE_P (mode)) return; set_src = SET_SRC (set); switch (GET_CODE (set_src)) { case DIV: case MOD: case UDIV: case UMOD: op1 = XEXP (set_src, 0); op2 = XEXP (set_src, 1); if (side_effects_p (op2)) return; /* Check for a special case where the divisor is power of 2. */ if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) { hist = ggc_alloc (sizeof (*hist)); hist->value = op2; hist->seq = NULL_RTX; hist->mode = mode; hist->insn = insn; hist->type = HIST_TYPE_POW2; hist->hdata.pow2.may_be_other = 1; VEC_safe_push (histogram_value, *values, hist); } /* Check whether the divisor is not in fact a constant. */ if (!CONSTANT_P (op2)) { hist = ggc_alloc (sizeof (*hist)); hist->value = op2; hist->mode = mode; hist->seq = NULL_RTX; hist->insn = insn; hist->type = HIST_TYPE_SINGLE_VALUE; VEC_safe_push (histogram_value, *values, hist); } /* For mod, check whether it is not often a noop (or replaceable by a few subtractions). */ if (GET_CODE (set_src) == UMOD && !side_effects_p (op1)) { rtx tmp; hist = ggc_alloc (sizeof (*hist)); start_sequence (); tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); hist->value = force_operand (tmp, NULL_RTX); hist->seq = get_insns (); end_sequence (); hist->mode = mode; hist->insn = insn; hist->type = HIST_TYPE_INTERVAL; hist->hdata.intvl.int_start = 0; hist->hdata.intvl.steps = 2; hist->hdata.intvl.may_be_less = 1; hist->hdata.intvl.may_be_more = 1; VEC_safe_push (histogram_value, *values, hist); } return; default: return; } }