void compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout, sbitmap *avin) { edge e; basic_block *worklist, *qin, *qout, *qend, bb; unsigned int qlen; edge_iterator ei; /* Allocate a worklist array/queue. Entries are only added to the list if they were not already on the list. So the size is bounded by the number of basic blocks. */ qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); /* We want a maximal solution. */ bitmap_vector_ones (avout, last_basic_block_for_fn (cfun)); /* Put every block on the worklist; this is necessary because of the optimistic initialization of AVOUT above. Use inverted postorder to make the dataflow problem require less iterations. */ int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); int postorder_num = inverted_post_order_compute (postorder); for (int i = 0; i < postorder_num; ++i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) continue; *qin++ = bb; bb->aux = bb; } free (postorder); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; /* Mark blocks which are successors of the entry block so that we can easily identify them below. */ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); /* Iterate until the worklist is empty. */ while (qlen) { /* Take the first entry off the worklist. */ bb = *qout++; qlen--; if (qout >= qend) qout = worklist; /* If one of the predecessor blocks is the ENTRY block, then the intersection of avouts is the null set. We can identify such blocks by the special value in the AUX field in the block structure. */ if (bb->aux == ENTRY_BLOCK_PTR_FOR_FN (cfun)) /* Do not clear the aux field for blocks which are successors of the ENTRY block. That way we never add then to the worklist again. */ bitmap_clear (avin[bb->index]); else { /* Clear the aux field of this block so that it can be added to the worklist again if necessary. */ bb->aux = NULL; bitmap_intersection_of_preds (avin[bb->index], avout, bb); } if (bitmap_ior_and_compl (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index])) /* If the out state of this block changed, then we need to add the successors of this block to the worklist if they are not already on the worklist. */ FOR_EACH_EDGE (e, ei, bb->succs) if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) { *qin++ = e->dest; e->dest->aux = e; qlen++; if (qin >= qend) qin = worklist; } } clear_aux_for_edges (); clear_aux_for_blocks (); free (worklist); }
/* The core routine of conditional store replacement and normal phi optimizations. Both share much of the infrastructure in how to match applicable basic block patterns. DO_STORE_ELIM is true when we want to do conditional store replacement, false otherwise. DO_HOIST_LOADS is true when we want to hoist adjacent loads out of diamond control flow patterns, false otherwise. */ static unsigned int tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) { basic_block bb; basic_block *bb_order; unsigned n, i; bool cfgchanged = false; hash_set<tree> *nontrap = 0; if (do_store_elim) /* Calculate the set of non-trapping memory accesses. */ nontrap = get_non_trapping (); /* Search every basic block for COND_EXPR we may be able to optimize. We walk the blocks in order that guarantees that a block with a single predecessor is processed before the predecessor. This ensures that we collapse inner ifs before visiting the outer ones, and also that we do not try to visit a removed block. */ bb_order = single_pred_before_succ_order (); n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; for (i = 0; i < n; i++) { gimple cond_stmt; gphi *phi; basic_block bb1, bb2; edge e1, e2; tree arg0, arg1; bb = bb_order[i]; cond_stmt = last_stmt (bb); /* Check to see if the last statement is a GIMPLE_COND. */ if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND) continue; e1 = EDGE_SUCC (bb, 0); bb1 = e1->dest; e2 = EDGE_SUCC (bb, 1); bb2 = e2->dest; /* We cannot do the optimization on abnormal edges. */ if ((e1->flags & EDGE_ABNORMAL) != 0 || (e2->flags & EDGE_ABNORMAL) != 0) continue; /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ if (EDGE_COUNT (bb1->succs) == 0 || bb2 == NULL || EDGE_COUNT (bb2->succs) == 0) continue; /* Find the bb which is the fall through to the other. */ if (EDGE_SUCC (bb1, 0)->dest == bb2) ; else if (EDGE_SUCC (bb2, 0)->dest == bb1) { std::swap (bb1, bb2); std::swap (e1, e2); } else if (do_store_elim && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) { basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; if (!single_succ_p (bb1) || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 || !single_succ_p (bb2) || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 || EDGE_COUNT (bb3->preds) != 2) continue; if (cond_if_else_store_replacement (bb1, bb2, bb3)) cfgchanged = true; continue; } else if (do_hoist_loads && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) { basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) && single_succ_p (bb1) && single_succ_p (bb2) && single_pred_p (bb1) && single_pred_p (bb2) && EDGE_COUNT (bb->succs) == 2 && EDGE_COUNT (bb3->preds) == 2 /* If one edge or the other is dominant, a conditional move is likely to perform worse than the well-predicted branch. */ && !predictable_edge_p (EDGE_SUCC (bb, 0)) && !predictable_edge_p (EDGE_SUCC (bb, 1))) hoist_adjacent_loads (bb, bb1, bb2, bb3); continue; } else continue; e1 = EDGE_SUCC (bb1, 0); /* Make sure that bb1 is just a fall through. */ if (!single_succ_p (bb1) || (e1->flags & EDGE_FALLTHRU) == 0) continue; /* Also make sure that bb1 only have one predecessor and that it is bb. */ if (!single_pred_p (bb1) || single_pred (bb1) != bb) continue; if (do_store_elim) { /* bb1 is the middle block, bb2 the join block, bb the split block, e1 the fallthrough edge from bb1 to bb2. We can't do the optimization if the join block has more than two predecessors. */ if (EDGE_COUNT (bb2->preds) > 2) continue; if (cond_store_replacement (bb1, bb2, e1, e2, nontrap)) cfgchanged = true; } else { gimple_seq phis = phi_nodes (bb2); gimple_stmt_iterator gsi; bool candorest = true; /* Value replacement can work with more than one PHI so try that first. */ for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) { phi = as_a <gphi *> (gsi_stmt (gsi)); arg0 = gimple_phi_arg_def (phi, e1->dest_idx); arg1 = gimple_phi_arg_def (phi, e2->dest_idx); if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) { candorest = false; cfgchanged = true; break; } } if (!candorest) continue; phi = single_non_singleton_phi_for_edges (phis, e1, e2); if (!phi) continue; arg0 = gimple_phi_arg_def (phi, e1->dest_idx); arg1 = gimple_phi_arg_def (phi, e2->dest_idx); /* Something is wrong if we cannot find the arguments in the PHI node. */ gcc_assert (arg0 != NULL && arg1 != NULL); if (factor_out_conditional_conversion (e1, e2, phi, arg0, arg1)) { /* factor_out_conditional_conversion may create a new PHI in BB2 and eliminate an existing PHI in BB2. Recompute values that may be affected by that change. */ phis = phi_nodes (bb2); phi = single_non_singleton_phi_for_edges (phis, e1, e2); gcc_assert (phi); arg0 = gimple_phi_arg_def (phi, e1->dest_idx); arg1 = gimple_phi_arg_def (phi, e2->dest_idx); gcc_assert (arg0 != NULL && arg1 != NULL); } /* Do the replacement of conditional if it can be done. */ if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; } } free (bb_order); if (do_store_elim) delete nontrap; /* If the CFG has changed, we should cleanup the CFG. */ if (cfgchanged && do_store_elim) { /* In cond-store replacement we have added some loads on edges and new VOPS (as we moved the store, and created a load). */ gsi_commit_edge_inserts (); return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; } else if (cfgchanged) return TODO_cleanup_cfg; return 0; }
static void compute_laterin (struct edge_list *edge_list, sbitmap *earliest, sbitmap *antloc, sbitmap *later, sbitmap *laterin) { int num_edges, i; edge e; basic_block *worklist, *qin, *qout, *qend, bb; unsigned int qlen; edge_iterator ei; num_edges = NUM_EDGES (edge_list); /* Allocate a worklist array/queue. Entries are only added to the list if they were not already on the list. So the size is bounded by the number of basic blocks. */ qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); /* Initialize a mapping from each edge to its index. */ for (i = 0; i < num_edges; i++) INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; /* We want a maximal solution, so initially consider LATER true for all edges. This allows propagation through a loop since the incoming loop edge will have LATER set, so if all the other incoming edges to the loop are set, then LATERIN will be set for the head of the loop. If the optimistic setting of LATER on that edge was incorrect (for example the expression is ANTLOC in a block within the loop) then this algorithm will detect it when we process the block at the head of the optimistic edge. That will requeue the affected blocks. */ bitmap_vector_ones (later, num_edges); /* Note that even though we want an optimistic setting of LATER, we do not want to be overly optimistic. Consider an outgoing edge from the entry block. That edge should always have a LATER value the same as EARLIEST for that edge. */ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) bitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]); /* Add all the blocks to the worklist. This prevents an early exit from the loop given our optimistic initialization of LATER above. */ int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); int postorder_num = inverted_post_order_compute (postorder); for (int i = 0; i < postorder_num; ++i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) continue; *qin++ = bb; bb->aux = bb; } free (postorder); /* Note that we do not use the last allocated element for our queue, as EXIT_BLOCK is never inserted into it. */ qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; /* Iterate until the worklist is empty. */ while (qlen) { /* Take the first entry off the worklist. */ bb = *qout++; bb->aux = NULL; qlen--; if (qout >= qend) qout = worklist; /* Compute the intersection of LATERIN for each incoming edge to B. */ bitmap_ones (laterin[bb->index]); FOR_EACH_EDGE (e, ei, bb->preds) bitmap_and (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]); /* Calculate LATER for all outgoing edges. */ FOR_EACH_EDGE (e, ei, bb->succs) if (bitmap_ior_and_compl (later[(size_t) e->aux], earliest[(size_t) e->aux], laterin[bb->index], antloc[bb->index]) /* If LATER for an outgoing edge was changed, then we need to add the target of the outgoing edge to the worklist. */ && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0) { *qin++ = e->dest; e->dest->aux = e; qlen++; if (qin >= qend) qin = worklist; } } /* Computation of insertion and deletion points requires computing LATERIN for the EXIT block. We allocated an extra entry in the LATERIN array for just this purpose. */ bitmap_ones (laterin[last_basic_block_for_fn (cfun)]); FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) bitmap_and (laterin[last_basic_block_for_fn (cfun)], laterin[last_basic_block_for_fn (cfun)], later[(size_t) e->aux]); clear_aux_for_edges (); free (worklist); }
static void compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin, sbitmap *antout) { basic_block bb; edge e; basic_block *worklist, *qin, *qout, *qend; unsigned int qlen; edge_iterator ei; /* Allocate a worklist array/queue. Entries are only added to the list if they were not already on the list. So the size is bounded by the number of basic blocks. */ qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); /* We want a maximal solution, so make an optimistic initialization of ANTIN. */ bitmap_vector_ones (antin, last_basic_block_for_fn (cfun)); /* Put every block on the worklist; this is necessary because of the optimistic initialization of ANTIN above. */ int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); int postorder_num = post_order_compute (postorder, false, false); for (int i = 0; i < postorder_num; ++i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); *qin++ = bb; bb->aux = bb; } free (postorder); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; /* Mark blocks which are predecessors of the exit block so that we can easily identify them below. */ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) e->src->aux = EXIT_BLOCK_PTR_FOR_FN (cfun); /* Iterate until the worklist is empty. */ while (qlen) { /* Take the first entry off the worklist. */ bb = *qout++; qlen--; if (qout >= qend) qout = worklist; if (bb->aux == EXIT_BLOCK_PTR_FOR_FN (cfun)) /* Do not clear the aux field for blocks which are predecessors of the EXIT block. That way we never add then to the worklist again. */ bitmap_clear (antout[bb->index]); else { /* Clear the aux field of this block so that it can be added to the worklist again if necessary. */ bb->aux = NULL; bitmap_intersection_of_succs (antout[bb->index], antin, bb); } if (bitmap_or_and (antin[bb->index], antloc[bb->index], transp[bb->index], antout[bb->index])) /* If the in state of this block changed, then we need to add the predecessors of this block to the worklist if they are not already on the worklist. */ FOR_EACH_EDGE (e, ei, bb->preds) if (!e->src->aux && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) { *qin++ = e->src; e->src->aux = e; qlen++; if (qin >= qend) qin = worklist; } } clear_aux_for_edges (); clear_aux_for_blocks (); free (worklist); }