static int rank (rtx insn1, rtx insn2) { basic_block bb1 = BLOCK_FOR_INSN (insn1); basic_block bb2 = BLOCK_FOR_INSN (insn2); if (bb1->count > bb2->count || bb1->frequency > bb2->frequency) return -1; if (bb1->count < bb2->count || bb1->frequency < bb2->frequency) return 1; return 0; }
static void split_pattern_seq (void) { rtx insn; basic_block bb; rtx retlabel, retjmp, saveinsn; int i; seq_block sb; insn = pattern_seqs->insn; bb = BLOCK_FOR_INSN (insn); /* Get the label after the sequence. This will be the return address. The label will be referenced using a symbol_ref so protect it from deleting. */ retlabel = block_label_after (insn); LABEL_PRESERVE_P (retlabel) = 1; /* Emit an indirect jump via the link register after the sequence acting as the return insn. Also emit a barrier and update the basic block. */ if (!find_reg_note (BB_END (bb), REG_NORETURN, NULL)) retjmp = emit_jump_insn_after (gen_indirect_jump (pattern_seqs->link_reg), BB_END (bb)); emit_barrier_after (BB_END (bb)); /* Replace all outgoing edges with a new one to the block of RETLABEL. */ while (EDGE_COUNT (bb->succs) != 0) remove_edge (EDGE_SUCC (bb, 0)); make_edge (bb, BLOCK_FOR_INSN (retlabel), EDGE_ABNORMAL); /* Split the sequence according to SEQ_BLOCKS and cache the label of the resulting basic blocks. */ i = 0; for (sb = seq_blocks; sb; sb = sb->next_seq_block) { for (; i < sb->length; i++) insn = prev_insn_in_block (insn); sb->label = block_label (split_block_and_df_analyze (bb, insn)); } /* Emit an insn saving the return address to the link register before the sequence. */ saveinsn = emit_insn_after (gen_move_insn (pattern_seqs->link_reg, gen_symbol_ref_rtx_for_label (retlabel)), BB_END (bb)); /* Update liveness info. */ SET_REGNO_REG_SET (df_get_live_out (bb), REGNO (pattern_seqs->link_reg)); }
/* The function is used to sort moves according to their execution frequencies. */ static int move_freq_compare_func (const void *v1p, const void *v2p) { rtx mv1 = *(const rtx *) v1p; rtx mv2 = *(const rtx *) v2p; int pri1, pri2; pri1 = BLOCK_FOR_INSN (mv1)->frequency; pri2 = BLOCK_FOR_INSN (mv2)->frequency; if (pri2 - pri1) return pri2 - pri1; /* If frequencies are equal, sort by moves, so that the results of qsort leave nothing to chance. */ return (int) INSN_UID (mv1) - (int) INSN_UID (mv2); }
/* Dump insn INSN honoring FLAGS. */ void dump_insn_rtx_1 (rtx insn, int flags) { int all; /* flags == -1 also means dumping all. */ all = (flags & 1);; if (all) flags |= DUMP_INSN_RTX_ALL; sel_print ("("); if (flags & DUMP_INSN_RTX_UID) sel_print ("%d;", INSN_UID (insn)); if (flags & DUMP_INSN_RTX_PATTERN) { char buf[2048]; print_insn (buf, insn, 0); sel_print ("%s;", buf); } if (flags & DUMP_INSN_RTX_BBN) { basic_block bb = BLOCK_FOR_INSN (insn); sel_print ("bb:%d;", bb != NULL ? bb->index : -1); } sel_print (")"); }
static void split_blocks_after_seqs (void) { seq_block sb; matching_seq mseq; block_label_after (pattern_seqs->insn); for (sb = seq_blocks; sb; sb = sb->next_seq_block) { for (mseq = sb->matching_seqs; mseq; mseq = mseq->next_matching_seq) { block_label_after (mseq->insn); IOR_REG_SET (df_get_live_out (BLOCK_FOR_INSN (pattern_seqs->insn)), df_get_live_out (BLOCK_FOR_INSN (mseq->insn))); } } }
static rtx block_label_after (rtx insn) { basic_block bb = BLOCK_FOR_INSN (insn); if ((insn == BB_END (bb)) && (bb->next_bb != EXIT_BLOCK_PTR)) return block_label (bb->next_bb); else return block_label (split_block_and_df_analyze (bb, insn)); }
static void erase_matching_seqs (void) { seq_block sb; matching_seq mseq; rtx insn; basic_block bb; rtx retlabel, saveinsn, callinsn; int i; for (sb = seq_blocks; sb; sb = sb->next_seq_block) { for (mseq = sb->matching_seqs; mseq; mseq = mseq->next_matching_seq) { insn = mseq->insn; bb = BLOCK_FOR_INSN (insn); /* Get the label after the sequence. This will be the return address. The label will be referenced using a symbol_ref so protect it from deleting. */ retlabel = block_label_after (insn); LABEL_PRESERVE_P (retlabel) = 1; /* Delete the insns of the sequence. */ for (i = 0; i < sb->length; i++) insn = prev_insn_in_block (insn); delete_basic_block (split_block_and_df_analyze (bb, insn)); /* Emit an insn saving the return address to the link register before the deleted sequence. */ saveinsn = emit_insn_after (gen_move_insn (pattern_seqs->link_reg, gen_symbol_ref_rtx_for_label (retlabel)), BB_END (bb)); BLOCK_FOR_INSN (saveinsn) = bb; /* Emit a jump to the appropriate part of the pattern sequence after the save insn. Also update the basic block. */ callinsn = emit_jump_insn_after (gen_jump (sb->label), saveinsn); JUMP_LABEL (callinsn) = sb->label; LABEL_NUSES (sb->label)++; BLOCK_FOR_INSN (callinsn) = bb; BB_END (bb) = callinsn; /* Maintain control flow and liveness information. */ SET_REGNO_REG_SET (df_get_live_out (bb), REGNO (pattern_seqs->link_reg)); emit_barrier_after (BB_END (bb)); make_single_succ_edge (bb, BLOCK_FOR_INSN (sb->label), 0); IOR_REG_SET (df_get_live_out (bb), df_get_live_in (BLOCK_FOR_INSN (sb->label))); make_edge (BLOCK_FOR_INSN (seq_blocks->label), BLOCK_FOR_INSN (retlabel), EDGE_ABNORMAL); } } }
void ebb_compute_jump_reg_dependencies (rtx insn, regset used) { basic_block b = BLOCK_FOR_INSN (insn); edge e; edge_iterator ei; FOR_EACH_EDGE (e, ei, b->succs) if ((e->flags & EDGE_FALLTHRU) == 0) bitmap_ior_into (used, df_get_live_in (e->dest)); }
static rtx get_next_ref (int regno, basic_block bb, rtx *next_array) { rtx insn = next_array[regno]; /* Lazy about cleaning out the next_arrays. */ if (insn && BLOCK_FOR_INSN (insn) != bb) { next_array[regno] = NULL; insn = NULL; } return insn; }
static bool rtl_value_profile_transformations (void) { rtx insn, next; int changed = false; for (insn = get_insns (); insn; insn = next) { next = NEXT_INSN (insn); if (!INSN_P (insn)) continue; /* Scan for insn carrying a histogram. */ if (!find_reg_note (insn, REG_VALUE_PROFILE, 0)) continue; /* Ignore cold areas -- we are growing a code. */ if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) continue; if (dump_file) { fprintf (dump_file, "Trying transformations on insn %d\n", INSN_UID (insn)); print_rtl_single (dump_file, insn); } /* Transformations: */ if (flag_value_profile_transformations && (mod_subtract_transform (insn) || divmod_fixed_value_transform (insn) || mod_pow2_value_transform (insn))) changed = true; #ifdef HAVE_prefetch if (flag_speculative_prefetching && speculative_prefetching_transform (insn)) changed = true; #endif } if (changed) { commit_edge_insertions (); allocate_reg_info (max_reg_num (), FALSE, FALSE); } return changed; }
static rtx prev_insn_in_block (rtx insn) { basic_block bb = BLOCK_FOR_INSN (insn); if (!bb) return NULL_RTX; while (insn != BB_HEAD (bb)) { insn = PREV_INSN (insn); if (INSN_P (insn)) return insn; } return NULL_RTX; }
void ebb_compute_jump_reg_dependencies (rtx insn, regset cond_set, regset used, regset set) { basic_block b = BLOCK_FOR_INSN (insn); edge e; edge_iterator ei; FOR_EACH_EDGE (e, ei, b->succs) if (e->flags & EDGE_FALLTHRU) /* The jump may be a by-product of a branch that has been merged in the main codepath after being conditionalized. Therefore it may guard the fallthrough block from using a value that has conditionally overwritten that of the main codepath. So we consider that it restores the value of the main codepath. */ bitmap_and (set, df_get_live_in (e->dest), cond_set); else bitmap_ior_into (used, df_get_live_in (e->dest)); }
static void clear_regs_live_in_seq (HARD_REG_SET * regs, rtx insn, int length) { basic_block bb; regset_head live; HARD_REG_SET hlive; rtx x; int i; /* Initialize liveness propagation. */ bb = BLOCK_FOR_INSN (insn); INIT_REG_SET (&live); bitmap_copy (&live, DF_LR_OUT (bb)); df_simulate_initialize_backwards (bb, &live); /* Propagate until INSN if found. */ for (x = BB_END (bb); x != insn; x = PREV_INSN (x)) df_simulate_one_insn_backwards (bb, x, &live); /* Clear registers live after INSN. */ renumbered_reg_set_to_hard_reg_set (&hlive, &live); AND_COMPL_HARD_REG_SET (*regs, hlive); /* Clear registers live in and before the sequence. */ for (i = 0; i < length;) { rtx prev = PREV_INSN (x); df_simulate_one_insn_backwards (bb, x, &live); if (INSN_P (x)) { renumbered_reg_set_to_hard_reg_set (&hlive, &live); AND_COMPL_HARD_REG_SET (*regs, hlive); i++; } x = prev; } /* Free unused data. */ CLEAR_REG_SET (&live); }
static basic_block earliest_block_with_similiar_load (basic_block last_block, rtx load_insn) { sd_iterator_def back_sd_it; dep_t back_dep; basic_block bb, earliest_block = NULL; FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep) { rtx insn1 = DEP_PRO (back_dep); if (DEP_TYPE (back_dep) == REG_DEP_TRUE) /* Found a DEF-USE dependence (insn1, load_insn). */ { sd_iterator_def fore_sd_it; dep_t fore_dep; FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep) { rtx insn2 = DEP_CON (fore_dep); basic_block insn2_block = BLOCK_FOR_INSN (insn2); if (DEP_TYPE (fore_dep) == REG_DEP_TRUE) { if (earliest_block != NULL && earliest_block->index < insn2_block->index) continue; /* Found a DEF-USE dependence (insn1, insn2). */ if (haifa_classify_insn (insn2) != PFREE_CANDIDATE) /* insn2 not guaranteed to be a 1 base reg load. */ continue; for (bb = last_block; bb; bb = (basic_block) bb->aux) if (insn2_block == bb) break; if (!bb) /* insn2 is the similar load. */ earliest_block = insn2_block; } }
/* Do transform 1) on INSN if applicable. */ static bool divmod_fixed_value_transform (rtx insn) { rtx set, set_src, set_dest, op1, op2, value, histogram; enum rtx_code code; enum machine_mode mode; gcov_type val, count, all; edge e; int prob; set = single_set (insn); if (!set) return false; set_src = SET_SRC (set); set_dest = SET_DEST (set); code = GET_CODE (set_src); mode = GET_MODE (set_dest); if (code != DIV && code != MOD && code != UDIV && code != UMOD) return false; op1 = XEXP (set_src, false); op2 = XEXP (set_src, 1); for (histogram = REG_NOTES (insn); histogram; histogram = XEXP (histogram, 1)) if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE)) break; if (!histogram) return false; histogram = XEXP (XEXP (histogram, 0), 1); value = XEXP (histogram, 0); histogram = XEXP (histogram, 1); val = INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); count = INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); all = INTVAL (XEXP (histogram, 0)); /* We require that count be at least half of all; this means that for the transformation to fire the value must be constant at least 50% of time (and 75% gives the guarantee of usage). */ if (!rtx_equal_p (op2, value) || 2 * count < all) return false; if (dump_file) fprintf (dump_file, "Div/mod by constant transformation on insn %d\n", INSN_UID (insn)); /* Compute probability of taking the optimal path. */ prob = (count * REG_BR_PROB_BASE + all / 2) / all; e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); delete_insn (insn); insert_insn_on_edge ( gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val, prob), e); return true; }
/* INSN is being scheduled after LAST. Update counters. */ static void begin_schedule_ready (rtx insn, rtx last) { sched_rgn_n_insns++; if (BLOCK_FOR_INSN (insn) == last_bb /* INSN is a jump in the last block, ... */ && control_flow_insn_p (insn) /* that is going to be moved over some instructions. */ && last != PREV_INSN (insn)) { edge e; edge_iterator ei; basic_block bb; /* An obscure special case, where we do have partially dead instruction scheduled after last control flow instruction. In this case we can create new basic block. It is always exactly one basic block last in the sequence. */ FOR_EACH_EDGE (e, ei, last_bb->succs) if (e->flags & EDGE_FALLTHRU) break; #ifdef ENABLE_CHECKING gcc_assert (!e || !(e->flags & EDGE_COMPLEX)); gcc_assert (BLOCK_FOR_INSN (insn) == last_bb && !IS_SPECULATION_CHECK_P (insn) && BB_HEAD (last_bb) != insn && BB_END (last_bb) == insn); { rtx x; x = NEXT_INSN (insn); if (e) gcc_assert (NOTE_P (x) || LABEL_P (x)); else gcc_assert (BARRIER_P (x)); } #endif if (e) { bb = split_edge (e); gcc_assert (NOTE_INSN_BASIC_BLOCK_P (BB_END (bb))); } else /* Create an empty unreachable block after the INSN. */ bb = create_basic_block (NEXT_INSN (insn), NULL_RTX, last_bb); /* split_edge () creates BB before E->DEST. Keep in mind, that this operation extends scheduling region till the end of BB. Hence, we need to shift NEXT_TAIL, so haifa-sched.c won't go out of the scheduling region. */ current_sched_info->next_tail = NEXT_INSN (BB_END (bb)); gcc_assert (current_sched_info->next_tail); /* Append new basic block to the end of the ebb. */ sched_init_only_bb (bb, last_bb); gcc_assert (last_bb == bb); }
static bool find_inc (bool first_try) { rtx insn; basic_block bb = BLOCK_FOR_INSN (mem_insn.insn); rtx other_insn; df_ref *def_rec; /* Make sure this reg appears only once in this insn. */ if (count_occurrences (PATTERN (mem_insn.insn), mem_insn.reg0, 1) != 1) { if (dump_file) fprintf (dump_file, "mem count failure\n"); return false; } if (dump_file) dump_mem_insn (dump_file); /* Find the next use that is an inc. */ insn = get_next_ref (REGNO (mem_insn.reg0), BLOCK_FOR_INSN (mem_insn.insn), reg_next_inc_use); if (!insn) return false; /* Even though we know the next use is an add or inc because it came from the reg_next_inc_use, we must still reparse. */ if (!parse_add_or_inc (insn, false)) { /* Next use was not an add. Look for one extra case. It could be that we have: *(a + b) ...= a; ...= b + a if we reverse the operands in the mem ref we would find this. Only try it once though. */ if (first_try && !mem_insn.reg1_is_const) { reverse_mem (); return find_inc (false); } else return false; } /* Need to assure that none of the operands of the inc instruction are assigned to by the mem insn. */ for (def_rec = DF_INSN_DEFS (mem_insn.insn); *def_rec; def_rec++) { df_ref def = *def_rec; unsigned int regno = DF_REF_REGNO (def); if ((regno == REGNO (inc_insn.reg0)) || (regno == REGNO (inc_insn.reg_res))) { if (dump_file) fprintf (dump_file, "inc conflicts with store failure.\n"); return false; } if (!inc_insn.reg1_is_const && (regno == REGNO (inc_insn.reg1))) { if (dump_file) fprintf (dump_file, "inc conflicts with store failure.\n"); return false; } } if (dump_file) dump_inc_insn (dump_file); if (inc_insn.form == FORM_POST_ADD) { /* Make sure that there is no insn that assigns to inc_insn.res between the mem_insn and the inc_insn. */ rtx other_insn = get_next_ref (REGNO (inc_insn.reg_res), BLOCK_FOR_INSN (mem_insn.insn), reg_next_def); if (other_insn != inc_insn.insn) { if (dump_file) fprintf (dump_file, "result of add is assigned to between mem and inc insns.\n"); return false; } other_insn = get_next_ref (REGNO (inc_insn.reg_res), BLOCK_FOR_INSN (mem_insn.insn), reg_next_use); if (other_insn && (other_insn != inc_insn.insn) && (DF_INSN_LUID (inc_insn.insn) > DF_INSN_LUID (other_insn))) { if (dump_file) fprintf (dump_file, "result of add is used between mem and inc insns.\n"); return false; } /* For the post_add to work, the result_reg of the inc must not be used in the mem insn since this will become the new index register. */ if (reg_overlap_mentioned_p (inc_insn.reg_res, PATTERN (mem_insn.insn))) { if (dump_file) fprintf (dump_file, "base reg replacement failure.\n"); return false; } } if (mem_insn.reg1_is_const) { if (mem_insn.reg1_val == 0) { if (!inc_insn.reg1_is_const) { /* The mem looks like *r0 and the rhs of the add has two registers. */ int luid = DF_INSN_LUID (inc_insn.insn); if (inc_insn.form == FORM_POST_ADD) { /* The trick is that we are not going to increment r0, we are going to increment the result of the add insn. For this trick to be correct, the result reg of the inc must be a valid addressing reg. */ addr_space_t as = MEM_ADDR_SPACE (*mem_insn.mem_loc); if (GET_MODE (inc_insn.reg_res) != targetm.addr_space.address_mode (as)) { if (dump_file) fprintf (dump_file, "base reg mode failure.\n"); return false; } /* We also need to make sure that the next use of inc result is after the inc. */ other_insn = get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_use); if (other_insn && luid > DF_INSN_LUID (other_insn)) return false; if (!rtx_equal_p (mem_insn.reg0, inc_insn.reg0)) reverse_inc (); } other_insn = get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_def); if (other_insn && luid > DF_INSN_LUID (other_insn)) return false; } } /* Both the inc/add and the mem have a constant. Need to check that the constants are ok. */ else if ((mem_insn.reg1_val != inc_insn.reg1_val) && (mem_insn.reg1_val != -inc_insn.reg1_val)) return false; } else { /* The mem insn is of the form *(a + b) where a and b are both regs. It may be that in order to match the add or inc we need to treat it as if it was *(b + a). It may also be that the add is of the form a + c where c does not match b and then we just abandon this. */ int luid = DF_INSN_LUID (inc_insn.insn); rtx other_insn; /* Make sure this reg appears only once in this insn. */ if (count_occurrences (PATTERN (mem_insn.insn), mem_insn.reg1, 1) != 1) return false; if (inc_insn.form == FORM_POST_ADD) { /* For this trick to be correct, the result reg of the inc must be a valid addressing reg. */ addr_space_t as = MEM_ADDR_SPACE (*mem_insn.mem_loc); if (GET_MODE (inc_insn.reg_res) != targetm.addr_space.address_mode (as)) { if (dump_file) fprintf (dump_file, "base reg mode failure.\n"); return false; } if (rtx_equal_p (mem_insn.reg0, inc_insn.reg0)) { if (!rtx_equal_p (mem_insn.reg1, inc_insn.reg1)) { /* See comment above on find_inc (false) call. */ if (first_try) { reverse_mem (); return find_inc (false); } else return false; } /* Need to check that there are no assignments to b before the add insn. */ other_insn = get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_def); if (other_insn && luid > DF_INSN_LUID (other_insn)) return false; /* All ok for the next step. */ } else { /* We know that mem_insn.reg0 must equal inc_insn.reg1 or else we would not have found the inc insn. */ reverse_mem (); if (!rtx_equal_p (mem_insn.reg0, inc_insn.reg0)) { /* See comment above on find_inc (false) call. */ if (first_try) return find_inc (false); else return false; } /* To have gotten here know that. *(b + a) ... = (b + a) We also know that the lhs of the inc is not b or a. We need to make sure that there are no assignments to b between the mem ref and the inc. */ other_insn = get_next_ref (REGNO (inc_insn.reg0), bb, reg_next_def); if (other_insn && luid > DF_INSN_LUID (other_insn)) return false; } /* Need to check that the next use of the add result is later than add insn since this will be the reg incremented. */ other_insn = get_next_ref (REGNO (inc_insn.reg_res), bb, reg_next_use); if (other_insn && luid > DF_INSN_LUID (other_insn)) return false; } else /* FORM_POST_INC. There is less to check here because we know that operands must line up. */ { if (!rtx_equal_p (mem_insn.reg1, inc_insn.reg1)) /* See comment above on find_inc (false) call. */ { if (first_try) { reverse_mem (); return find_inc (false); } else return false; } /* To have gotten here know that. *(a + b) ... = (a + b) We also know that the lhs of the inc is not b. We need to make sure that there are no assignments to b between the mem ref and the inc. */ other_insn = get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_def); if (other_insn && luid > DF_INSN_LUID (other_insn)) return false; } } if (inc_insn.form == FORM_POST_INC) { other_insn = get_next_ref (REGNO (inc_insn.reg0), bb, reg_next_use); /* When we found inc_insn, we were looking for the next add or inc, not the next insn that used the reg. Because we are going to increment the reg in this form, we need to make sure that there were no intervening uses of reg. */ if (inc_insn.insn != other_insn) return false; } return try_merge (); }
/* The major function for aggressive pseudo coalescing of moves only if the both pseudos were spilled and not special reload pseudos. */ bool lra_coalesce (void) { basic_block bb; rtx mv, set, insn, next, *sorted_moves; int i, mv_num, sregno, dregno; int coalesced_moves; int max_regno = max_reg_num (); bitmap_head involved_insns_bitmap; timevar_push (TV_LRA_COALESCE); if (lra_dump_file != NULL) fprintf (lra_dump_file, "\n********** Pseudos coalescing #%d: **********\n\n", ++lra_coalesce_iter); first_coalesced_pseudo = XNEWVEC (int, max_regno); next_coalesced_pseudo = XNEWVEC (int, max_regno); for (i = 0; i < max_regno; i++) first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i; sorted_moves = XNEWVEC (rtx, get_max_uid ()); mv_num = 0; /* Collect moves. */ coalesced_moves = 0; FOR_EACH_BB (bb) { FOR_BB_INSNS_SAFE (bb, insn, next) if (INSN_P (insn) && (set = single_set (insn)) != NULL_RTX && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)) && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER && mem_move_p (sregno, dregno) && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno) && ! side_effects_p (set) && !(lra_intersected_live_ranges_p (lra_reg_info[sregno].live_ranges, lra_reg_info[dregno].live_ranges))) sorted_moves[mv_num++] = insn; } qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func); /* Coalesced copies, most frequently executed first. */ bitmap_initialize (&coalesced_pseudos_bitmap, ®_obstack); bitmap_initialize (&involved_insns_bitmap, ®_obstack); for (i = 0; i < mv_num; i++) { mv = sorted_moves[i]; set = single_set (mv); lra_assert (set != NULL && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))); sregno = REGNO (SET_SRC (set)); dregno = REGNO (SET_DEST (set)); if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno]) { coalesced_moves++; if (lra_dump_file != NULL) fprintf (lra_dump_file, " Coalescing move %i:r%d-r%d (freq=%d)\n", INSN_UID (mv), sregno, dregno, BLOCK_FOR_INSN (mv)->frequency); /* We updated involved_insns_bitmap when doing the merge. */ } else if (!(lra_intersected_live_ranges_p (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges, lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges))) { coalesced_moves++; if (lra_dump_file != NULL) fprintf (lra_dump_file, " Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n", INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)), dregno, ORIGINAL_REGNO (SET_DEST (set)), BLOCK_FOR_INSN (mv)->frequency); bitmap_ior_into (&involved_insns_bitmap, &lra_reg_info[sregno].insn_bitmap); bitmap_ior_into (&involved_insns_bitmap, &lra_reg_info[dregno].insn_bitmap); merge_pseudos (sregno, dregno); } } bitmap_initialize (&used_pseudos_bitmap, ®_obstack); FOR_EACH_BB (bb) { update_live_info (df_get_live_in (bb)); update_live_info (df_get_live_out (bb)); FOR_BB_INSNS_SAFE (bb, insn, next) if (INSN_P (insn) && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn))) { if (! substitute (&insn)) continue; lra_update_insn_regno_info (insn); if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set)) { /* Coalesced move. */ if (lra_dump_file != NULL) fprintf (lra_dump_file, " Removing move %i (freq=%d)\n", INSN_UID (insn), BLOCK_FOR_INSN (insn)->frequency); lra_set_insn_deleted (insn); } } } bitmap_clear (&used_pseudos_bitmap); bitmap_clear (&involved_insns_bitmap); bitmap_clear (&coalesced_pseudos_bitmap); if (lra_dump_file != NULL && coalesced_moves != 0) fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves); free (sorted_moves); free (next_coalesced_pseudo); free (first_coalesced_pseudo); timevar_pop (TV_LRA_COALESCE); return coalesced_moves != 0; }
static int reload_cse_simplify_operands (rtx insn, rtx testreg) { int i, j; /* For each operand, all registers that are equivalent to it. */ HARD_REG_SET equiv_regs[MAX_RECOG_OPERANDS]; const char *constraints[MAX_RECOG_OPERANDS]; /* Vector recording how bad an alternative is. */ int *alternative_reject; /* Vector recording how many registers can be introduced by choosing this alternative. */ int *alternative_nregs; /* Array of vectors recording, for each operand and each alternative, which hard register to substitute, or -1 if the operand should be left as it is. */ int *op_alt_regno[MAX_RECOG_OPERANDS]; /* Array of alternatives, sorted in order of decreasing desirability. */ int *alternative_order; extract_insn (insn); if (recog_data.n_alternatives == 0 || recog_data.n_operands == 0) return 0; /* Figure out which alternative currently matches. */ if (! constrain_operands (1)) fatal_insn_not_found (insn); alternative_reject = XALLOCAVEC (int, recog_data.n_alternatives); alternative_nregs = XALLOCAVEC (int, recog_data.n_alternatives); alternative_order = XALLOCAVEC (int, recog_data.n_alternatives); memset (alternative_reject, 0, recog_data.n_alternatives * sizeof (int)); memset (alternative_nregs, 0, recog_data.n_alternatives * sizeof (int)); /* For each operand, find out which regs are equivalent. */ for (i = 0; i < recog_data.n_operands; i++) { cselib_val *v; struct elt_loc_list *l; rtx op; enum machine_mode mode; CLEAR_HARD_REG_SET (equiv_regs[i]); /* cselib blows up on CODE_LABELs. Trying to fix that doesn't seem right, so avoid the problem here. Likewise if we have a constant and the insn pattern doesn't tell us the mode we need. */ if (LABEL_P (recog_data.operand[i]) || (CONSTANT_P (recog_data.operand[i]) && recog_data.operand_mode[i] == VOIDmode)) continue; op = recog_data.operand[i]; mode = GET_MODE (op); #ifdef LOAD_EXTEND_OP if (MEM_P (op) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD && LOAD_EXTEND_OP (mode) != UNKNOWN) { rtx set = single_set (insn); /* We might have multiple sets, some of which do implicit extension. Punt on this for now. */ if (! set) continue; /* If the destination is also a MEM or a STRICT_LOW_PART, no extension applies. Also, if there is an explicit extension, we don't have to worry about an implicit one. */ else if (MEM_P (SET_DEST (set)) || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART || GET_CODE (SET_SRC (set)) == ZERO_EXTEND || GET_CODE (SET_SRC (set)) == SIGN_EXTEND) ; /* Continue ordinary processing. */ #ifdef CANNOT_CHANGE_MODE_CLASS /* If the register cannot change mode to word_mode, it follows that it cannot have been used in word_mode. */ else if (REG_P (SET_DEST (set)) && CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)), word_mode, REGNO_REG_CLASS (REGNO (SET_DEST (set))))) ; /* Continue ordinary processing. */ #endif /* If this is a straight load, make the extension explicit. */ else if (REG_P (SET_DEST (set)) && recog_data.n_operands == 2 && SET_SRC (set) == op && SET_DEST (set) == recog_data.operand[1-i]) { validate_change (insn, recog_data.operand_loc[i], gen_rtx_fmt_e (LOAD_EXTEND_OP (mode), word_mode, op), 1); validate_change (insn, recog_data.operand_loc[1-i], gen_rtx_REG (word_mode, REGNO (SET_DEST (set))), 1); if (! apply_change_group ()) return 0; return reload_cse_simplify_operands (insn, testreg); } else /* ??? There might be arithmetic operations with memory that are safe to optimize, but is it worth the trouble? */ continue; } #endif /* LOAD_EXTEND_OP */ v = cselib_lookup (op, recog_data.operand_mode[i], 0); if (! v) continue; for (l = v->locs; l; l = l->next) if (REG_P (l->loc)) SET_HARD_REG_BIT (equiv_regs[i], REGNO (l->loc)); } for (i = 0; i < recog_data.n_operands; i++) { enum machine_mode mode; int regno; const char *p; op_alt_regno[i] = XALLOCAVEC (int, recog_data.n_alternatives); for (j = 0; j < recog_data.n_alternatives; j++) op_alt_regno[i][j] = -1; p = constraints[i] = recog_data.constraints[i]; mode = recog_data.operand_mode[i]; /* Add the reject values for each alternative given by the constraints for this operand. */ j = 0; while (*p != '\0') { char c = *p++; if (c == ',') j++; else if (c == '?') alternative_reject[j] += 3; else if (c == '!') alternative_reject[j] += 300; } /* We won't change operands which are already registers. We also don't want to modify output operands. */ regno = true_regnum (recog_data.operand[i]); if (regno >= 0 || constraints[i][0] == '=' || constraints[i][0] == '+') continue; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) { int rclass = (int) NO_REGS; if (! TEST_HARD_REG_BIT (equiv_regs[i], regno)) continue; SET_REGNO (testreg, regno); PUT_MODE (testreg, mode); /* We found a register equal to this operand. Now look for all alternatives that can accept this register and have not been assigned a register they can use yet. */ j = 0; p = constraints[i]; for (;;) { char c = *p; switch (c) { case '=': case '+': case '?': case '#': case '&': case '!': case '*': case '%': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '<': case '>': case 'V': case 'o': case 'E': case 'F': case 'G': case 'H': case 's': case 'i': case 'n': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'p': case 'X': case TARGET_MEM_CONSTRAINT: /* These don't say anything we care about. */ break; case 'g': case 'r': rclass = reg_class_subunion[(int) rclass][(int) GENERAL_REGS]; break; default: rclass = (reg_class_subunion [(int) rclass] [(int) REG_CLASS_FROM_CONSTRAINT ((unsigned char) c, p)]); break; case ',': case '\0': /* See if REGNO fits this alternative, and set it up as the replacement register if we don't have one for this alternative yet and the operand being replaced is not a cheap CONST_INT. */ if (op_alt_regno[i][j] == -1 && reg_fits_class_p (testreg, rclass, 0, mode) && (GET_CODE (recog_data.operand[i]) != CONST_INT || (rtx_cost (recog_data.operand[i], SET, optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn))) > rtx_cost (testreg, SET, optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)))))) { alternative_nregs[j]++; op_alt_regno[i][j] = regno; } j++; rclass = (int) NO_REGS; break; } p += CONSTRAINT_LEN (c, p); if (c == '\0') break; } } } /* Record all alternatives which are better or equal to the currently matching one in the alternative_order array. */ for (i = j = 0; i < recog_data.n_alternatives; i++) if (alternative_reject[i] <= alternative_reject[which_alternative]) alternative_order[j++] = i; recog_data.n_alternatives = j; /* Sort it. Given a small number of alternatives, a dumb algorithm won't hurt too much. */ for (i = 0; i < recog_data.n_alternatives - 1; i++) { int best = i; int best_reject = alternative_reject[alternative_order[i]]; int best_nregs = alternative_nregs[alternative_order[i]]; int tmp; for (j = i + 1; j < recog_data.n_alternatives; j++) { int this_reject = alternative_reject[alternative_order[j]]; int this_nregs = alternative_nregs[alternative_order[j]]; if (this_reject < best_reject || (this_reject == best_reject && this_nregs > best_nregs)) { best = j; best_reject = this_reject; best_nregs = this_nregs; } } tmp = alternative_order[best]; alternative_order[best] = alternative_order[i]; alternative_order[i] = tmp; } /* Substitute the operands as determined by op_alt_regno for the best alternative. */ j = alternative_order[0]; for (i = 0; i < recog_data.n_operands; i++) { enum machine_mode mode = recog_data.operand_mode[i]; if (op_alt_regno[i][j] == -1) continue; validate_change (insn, recog_data.operand_loc[i], gen_rtx_REG (mode, op_alt_regno[i][j]), 1); } for (i = recog_data.n_dups - 1; i >= 0; i--) { int op = recog_data.dup_num[i]; enum machine_mode mode = recog_data.operand_mode[op]; if (op_alt_regno[op][j] == -1) continue; validate_change (insn, recog_data.dup_loc[i], gen_rtx_REG (mode, op_alt_regno[op][j]), 1); } return apply_change_group (); }
/* Do transforms 3) and 4) on INSN if applicable. */ static bool mod_subtract_transform (rtx insn) { rtx set, set_src, set_dest, op1, op2, value, histogram; enum rtx_code code; enum machine_mode mode; gcov_type wrong_values, counts[2], count, all; edge e; int i, prob1, prob2; set = single_set (insn); if (!set) return false; set_src = SET_SRC (set); set_dest = SET_DEST (set); code = GET_CODE (set_src); mode = GET_MODE (set_dest); if (code != UMOD) return false; op1 = XEXP (set_src, 0); op2 = XEXP (set_src, 1); for (histogram = REG_NOTES (insn); histogram; histogram = XEXP (histogram, 1)) if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL)) break; if (!histogram) return false; histogram = XEXP (XEXP (histogram, 0), 1); value = XEXP (histogram, 0); histogram = XEXP (histogram, 1); all = 0; for (i = 0; i < 2; i++) { counts[i] = INTVAL (XEXP (histogram, 0)); all += counts[i]; histogram = XEXP (histogram, 1); } wrong_values = INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); wrong_values += INTVAL (XEXP (histogram, 0)); all += wrong_values; /* We require that we use just subtractions in at least 50% of all evaluations. */ count = 0; for (i = 0; i < 2; i++) { count += counts[i]; if (count * 2 >= all) break; } if (i == 2) return false; if (dump_file) fprintf (dump_file, "Mod subtract transformation on insn %d\n", INSN_UID (insn)); /* Compute probability of taking the optimal path(s). */ prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all; prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all; e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); delete_insn (insn); insert_insn_on_edge ( gen_mod_subtract (mode, code, set_dest, op1, op2, i, prob1, prob2), e); return true; }
void optimize_sibling_and_tail_recursive_calls (void) { rtx insn, insns; basic_block alternate_exit = EXIT_BLOCK_PTR; bool no_sibcalls_this_function = false; bool successful_replacement = false; bool replaced_call_placeholder = false; edge e; insns = get_insns (); cleanup_cfg (CLEANUP_PRE_SIBCALL | CLEANUP_PRE_LOOP); /* If there are no basic blocks, then there is nothing to do. */ if (n_basic_blocks == 0) return; /* If we are using sjlj exceptions, we may need to add a call to _Unwind_SjLj_Unregister at exit of the function. Which means that we cannot do any sibcall transformations. */ if (USING_SJLJ_EXCEPTIONS && current_function_has_exception_handlers ()) no_sibcalls_this_function = true; return_value_pseudo = NULL_RTX; /* Find the exit block. It is possible that we have blocks which can reach the exit block directly. However, most of the time a block will jump (or fall into) N_BASIC_BLOCKS - 1, which in turn falls into the exit block. */ for (e = EXIT_BLOCK_PTR->pred; e && alternate_exit == EXIT_BLOCK_PTR; e = e->pred_next) { rtx insn; if (e->dest != EXIT_BLOCK_PTR || e->succ_next != NULL) continue; /* Walk forwards through the last normal block and see if it does nothing except fall into the exit block. */ for (insn = BB_HEAD (EXIT_BLOCK_PTR->prev_bb); insn; insn = NEXT_INSN (insn)) { rtx set; /* This should only happen once, at the start of this block. */ if (GET_CODE (insn) == CODE_LABEL) continue; if (GET_CODE (insn) == NOTE) continue; if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == USE) continue; /* Exit block also may contain copy from pseudo containing return value to hard register. */ if (GET_CODE (insn) == INSN && (set = single_set (insn)) && SET_DEST (set) == current_function_return_rtx && REG_P (SET_SRC (set)) && !return_value_pseudo) { return_value_pseudo = SET_SRC (set); continue; } break; } /* If INSN is zero, then the search walked all the way through the block without hitting anything interesting. This block is a valid alternate exit block. */ if (insn == NULL) alternate_exit = e->src; else return_value_pseudo = NULL; } /* If the function uses ADDRESSOF, we can't (easily) determine at this point if the value will end up on the stack. */ no_sibcalls_this_function |= sequence_uses_addressof (insns); /* Walk the insn chain and find any CALL_PLACEHOLDER insns. We need to select one of the insn sequences attached to each CALL_PLACEHOLDER. The different sequences represent different ways to implement the call, ie, tail recursion, sibling call or normal call. Since we do not create nested CALL_PLACEHOLDERs, the scan continues with the insn that was after a replaced CALL_PLACEHOLDER; we don't rescan the replacement insns. */ for (insn = insns; insn; insn = NEXT_INSN (insn)) { if (GET_CODE (insn) == CALL_INSN && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) { int sibcall = (XEXP (PATTERN (insn), 1) != NULL_RTX); int tailrecursion = (XEXP (PATTERN (insn), 2) != NULL_RTX); basic_block call_block = BLOCK_FOR_INSN (insn); /* alloca (until we have stack slot life analysis) inhibits sibling call optimizations, but not tail recursion. Similarly if we use varargs or stdarg since they implicitly may take the address of an argument. */ if (current_function_calls_alloca || current_function_stdarg) sibcall = 0; /* See if there are any reasons we can't perform either sibling or tail call optimizations. We must be careful with stack slots which are live at potential optimization sites. */ if (no_sibcalls_this_function /* ??? Overly conservative. */ || frame_offset /* Any function that calls setjmp might have longjmp called from any called function. ??? We really should represent this properly in the CFG so that this needn't be special cased. */ || current_function_calls_setjmp /* Can't if more than one successor or single successor is not exit block. These two tests prevent tail call optimization in the presence of active exception handlers. */ || call_block->succ == NULL || call_block->succ->succ_next != NULL || (call_block->succ->dest != EXIT_BLOCK_PTR && call_block->succ->dest != alternate_exit) /* If this call doesn't end the block, there are operations at the end of the block which we must execute after returning. */ || ! call_ends_block_p (insn, BB_END (call_block))) sibcall = 0, tailrecursion = 0; /* Select a set of insns to implement the call and emit them. Tail recursion is the most efficient, so select it over a tail/sibling call. */ if (sibcall || tailrecursion) successful_replacement = true; replaced_call_placeholder = true; replace_call_placeholder (insn, tailrecursion != 0 ? sibcall_use_tail_recursion : sibcall != 0 ? sibcall_use_sibcall : sibcall_use_normal); } } if (successful_replacement) { rtx insn; tree arg; /* A sibling call sequence invalidates any REG_EQUIV notes made for this function's incoming arguments. At the start of RTL generation we know the only REG_EQUIV notes in the rtl chain are those for incoming arguments, so we can safely flush any REG_EQUIV note. This is (slight) overkill. We could keep track of the highest argument we clobber and be more selective in removing notes, but it does not seem to be worth the effort. */ purge_reg_equiv_notes (); /* A sibling call sequence also may invalidate RTX_UNCHANGING_P flag of some incoming arguments MEM RTLs, because it can write into those slots. We clear all those bits now. This is (slight) overkill, we could keep track of which arguments we actually write into. */ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { if (INSN_P (insn)) purge_mem_unchanging_flag (PATTERN (insn)); } /* Similarly, invalidate RTX_UNCHANGING_P for any incoming arguments passed in registers. */ for (arg = DECL_ARGUMENTS (current_function_decl); arg; arg = TREE_CHAIN (arg)) { if (REG_P (DECL_RTL (arg))) RTX_UNCHANGING_P (DECL_RTL (arg)) = false; } } /* There may have been NOTE_INSN_BLOCK_{BEGIN,END} notes in the CALL_PLACEHOLDER alternatives that we didn't emit. Rebuild the lexical block tree to correspond to the notes that still exist. */ if (replaced_call_placeholder) reorder_blocks (); /* This information will be invalid after inline expansion. Kill it now. */ free_basic_block_vars (0); free_EXPR_LIST_list (&tail_recursion_label_list); }
/* Do transform 2) on INSN if applicable. */ static bool mod_pow2_value_transform (rtx insn) { rtx set, set_src, set_dest, op1, op2, value, histogram; enum rtx_code code; enum machine_mode mode; gcov_type wrong_values, count; edge e; int i, all, prob; set = single_set (insn); if (!set) return false; set_src = SET_SRC (set); set_dest = SET_DEST (set); code = GET_CODE (set_src); mode = GET_MODE (set_dest); if (code != UMOD) return false; op1 = XEXP (set_src, 0); op2 = XEXP (set_src, 1); for (histogram = REG_NOTES (insn); histogram; histogram = XEXP (histogram, 1)) if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2)) break; if (!histogram) return false; histogram = XEXP (XEXP (histogram, 0), 1); value = XEXP (histogram, 0); histogram = XEXP (histogram, 1); wrong_values =INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); count = 0; for (i = 0; i < GET_MODE_BITSIZE (mode); i++) { count += INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); } if (!rtx_equal_p (op2, value)) return false; /* We require that we hit a power of two at least half of all evaluations. */ if (count < wrong_values) return false; if (dump_file) fprintf (dump_file, "Mod power of 2 transformation on insn %d\n", INSN_UID (insn)); /* Compute probability of taking the optimal path. */ all = count + wrong_values; prob = (count * REG_BR_PROB_BASE + all / 2) / all; e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); delete_insn (insn); insert_insn_on_edge ( gen_mod_pow2 (mode, code, set_dest, op1, op2, prob), e); return true; }
static bool attempt_change (rtx new_addr, rtx inc_reg) { /* There are four cases: For the two cases that involve an add instruction, we are going to have to delete the add and insert a mov. We are going to assume that the mov is free. This is fairly early in the backend and there are a lot of opportunities for removing that move later. In particular, there is the case where the move may be dead, this is what dead code elimination passes are for. The two cases where we have an inc insn will be handled mov free. */ basic_block bb = BLOCK_FOR_INSN (mem_insn.insn); rtx mov_insn = NULL; int regno; rtx mem = *mem_insn.mem_loc; enum machine_mode mode = GET_MODE (mem); rtx new_mem; int old_cost = 0; int new_cost = 0; bool speed = optimize_bb_for_speed_p (bb); PUT_MODE (mem_tmp, mode); XEXP (mem_tmp, 0) = new_addr; old_cost = (set_src_cost (mem, speed) + set_rtx_cost (PATTERN (inc_insn.insn), speed)); new_cost = set_src_cost (mem_tmp, speed); /* The first item of business is to see if this is profitable. */ if (old_cost < new_cost) { if (dump_file) fprintf (dump_file, "cost failure old=%d new=%d\n", old_cost, new_cost); return false; } /* Jump through a lot of hoops to keep the attributes up to date. We do not want to call one of the change address variants that take an offset even though we know the offset in many cases. These assume you are changing where the address is pointing by the offset. */ new_mem = replace_equiv_address_nv (mem, new_addr); if (! validate_change (mem_insn.insn, mem_insn.mem_loc, new_mem, 0)) { if (dump_file) fprintf (dump_file, "validation failure\n"); return false; } /* From here to the end of the function we are committed to the change, i.e. nothing fails. Generate any necessary movs, move any regnotes, and fix up the reg_next_{use,inc_use,def}. */ switch (inc_insn.form) { case FORM_PRE_ADD: /* Replace the addition with a move. Do it at the location of the addition since the operand of the addition may change before the memory reference. */ mov_insn = insert_move_insn_before (inc_insn.insn, inc_insn.reg_res, inc_insn.reg0); move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0); regno = REGNO (inc_insn.reg_res); reg_next_def[regno] = mov_insn; reg_next_use[regno] = NULL; regno = REGNO (inc_insn.reg0); reg_next_use[regno] = mov_insn; df_recompute_luids (bb); break; case FORM_POST_INC: regno = REGNO (inc_insn.reg_res); if (reg_next_use[regno] == reg_next_inc_use[regno]) reg_next_inc_use[regno] = NULL; /* Fallthru. */ case FORM_PRE_INC: regno = REGNO (inc_insn.reg_res); reg_next_def[regno] = mem_insn.insn; reg_next_use[regno] = NULL; break; case FORM_POST_ADD: mov_insn = insert_move_insn_before (mem_insn.insn, inc_insn.reg_res, inc_insn.reg0); move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0); /* Do not move anything to the mov insn because the instruction pointer for the main iteration has not yet hit that. It is still pointing to the mem insn. */ regno = REGNO (inc_insn.reg_res); reg_next_def[regno] = mem_insn.insn; reg_next_use[regno] = NULL; regno = REGNO (inc_insn.reg0); reg_next_use[regno] = mem_insn.insn; if ((reg_next_use[regno] == reg_next_inc_use[regno]) || (reg_next_inc_use[regno] == inc_insn.insn)) reg_next_inc_use[regno] = NULL; df_recompute_luids (bb); break; case FORM_last: default: gcc_unreachable (); } if (!inc_insn.reg1_is_const) { regno = REGNO (inc_insn.reg1); reg_next_use[regno] = mem_insn.insn; if ((reg_next_use[regno] == reg_next_inc_use[regno]) || (reg_next_inc_use[regno] == inc_insn.insn)) reg_next_inc_use[regno] = NULL; } delete_insn (inc_insn.insn); if (dump_file && mov_insn) { fprintf (dump_file, "inserting mov "); dump_insn_slim (dump_file, mov_insn); } /* Record that this insn has an implicit side effect. */ add_reg_note (mem_insn.insn, REG_INC, inc_reg); if (dump_file) { fprintf (dump_file, "****success "); dump_insn_slim (dump_file, mem_insn.insn); } return true; }
static int reload_cse_simplify_set (rtx set, rtx insn) { int did_change = 0; int dreg; rtx src; enum reg_class dclass; int old_cost; cselib_val *val; struct elt_loc_list *l; #ifdef LOAD_EXTEND_OP enum rtx_code extend_op = UNKNOWN; #endif bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); dreg = true_regnum (SET_DEST (set)); if (dreg < 0) return 0; src = SET_SRC (set); if (side_effects_p (src) || true_regnum (src) >= 0) return 0; dclass = REGNO_REG_CLASS (dreg); #ifdef LOAD_EXTEND_OP /* When replacing a memory with a register, we need to honor assumptions that combine made wrt the contents of sign bits. We'll do this by generating an extend instruction instead of a reg->reg copy. Thus the destination must be a register that we can widen. */ if (MEM_P (src) && GET_MODE_BITSIZE (GET_MODE (src)) < BITS_PER_WORD && (extend_op = LOAD_EXTEND_OP (GET_MODE (src))) != UNKNOWN && !REG_P (SET_DEST (set))) return 0; #endif val = cselib_lookup (src, GET_MODE (SET_DEST (set)), 0); if (! val) return 0; /* If memory loads are cheaper than register copies, don't change them. */ if (MEM_P (src)) old_cost = MEMORY_MOVE_COST (GET_MODE (src), dclass, 1); else if (REG_P (src)) old_cost = REGISTER_MOVE_COST (GET_MODE (src), REGNO_REG_CLASS (REGNO (src)), dclass); else old_cost = rtx_cost (src, SET, speed); for (l = val->locs; l; l = l->next) { rtx this_rtx = l->loc; int this_cost; if (CONSTANT_P (this_rtx) && ! references_value_p (this_rtx, 0)) { #ifdef LOAD_EXTEND_OP if (extend_op != UNKNOWN) { HOST_WIDE_INT this_val; /* ??? I'm lazy and don't wish to handle CONST_DOUBLE. Other constants, such as SYMBOL_REF, cannot be extended. */ if (GET_CODE (this_rtx) != CONST_INT) continue; this_val = INTVAL (this_rtx); switch (extend_op) { case ZERO_EXTEND: this_val &= GET_MODE_MASK (GET_MODE (src)); break; case SIGN_EXTEND: /* ??? In theory we're already extended. */ if (this_val == trunc_int_for_mode (this_val, GET_MODE (src))) break; default: gcc_unreachable (); } this_rtx = GEN_INT (this_val); } #endif this_cost = rtx_cost (this_rtx, SET, speed); } else if (REG_P (this_rtx)) { #ifdef LOAD_EXTEND_OP if (extend_op != UNKNOWN) { this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx); this_cost = rtx_cost (this_rtx, SET, speed); } else #endif this_cost = REGISTER_MOVE_COST (GET_MODE (this_rtx), REGNO_REG_CLASS (REGNO (this_rtx)), dclass); } else continue; /* If equal costs, prefer registers over anything else. That tends to lead to smaller instructions on some machines. */ if (this_cost < old_cost || (this_cost == old_cost && REG_P (this_rtx) && !REG_P (SET_SRC (set)))) { #ifdef LOAD_EXTEND_OP if (GET_MODE_BITSIZE (GET_MODE (SET_DEST (set))) < BITS_PER_WORD && extend_op != UNKNOWN #ifdef CANNOT_CHANGE_MODE_CLASS && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)), word_mode, REGNO_REG_CLASS (REGNO (SET_DEST (set)))) #endif ) { rtx wide_dest = gen_rtx_REG (word_mode, REGNO (SET_DEST (set))); ORIGINAL_REGNO (wide_dest) = ORIGINAL_REGNO (SET_DEST (set)); validate_change (insn, &SET_DEST (set), wide_dest, 1); } #endif validate_unshare_change (insn, &SET_SRC (set), this_rtx, 1); old_cost = this_cost, did_change = 1; } } return did_change; }
static bool speculative_prefetching_transform (rtx insn) { rtx histogram, value; gcov_type val, count, all; edge e; rtx mem, address; int write; if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) return false; if (!find_mem_reference (insn, &mem, &write)) return false; address = XEXP (mem, 0); if (side_effects_p (address)) return false; if (CONSTANT_P (address)) return false; for (histogram = REG_NOTES (insn); histogram; histogram = XEXP (histogram, 1)) if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA)) break; if (!histogram) return false; histogram = XEXP (XEXP (histogram, 0), 1); value = XEXP (histogram, 0); histogram = XEXP (histogram, 1); /* Skip last value referenced. */ histogram = XEXP (histogram, 1); val = INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); count = INTVAL (XEXP (histogram, 0)); histogram = XEXP (histogram, 1); all = INTVAL (XEXP (histogram, 0)); /* With that few executions we do not really have a reason to optimize the statement, and more importantly, the data about differences of addresses are spoiled by the first item that had no previous value to compare with. */ if (all < 4) return false; /* We require that count be at least half of all; this means that for the transformation to fire the value must be constant at least 50% of time (and 75% gives the guarantee of usage). */ if (!rtx_equal_p (address, value) || 2 * count < all) return false; /* If the difference is too small, it does not make too much sense to prefetch, as the memory is probably already in cache. */ if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX) return false; if (dump_file) fprintf (dump_file, "Speculative prefetching for insn %d\n", INSN_UID (insn)); e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e); return true; }