void input_bb (struct lto_input_block *ib, enum LTO_tags tag, struct data_in *data_in, struct function *fn, int count_materialization_scale) { unsigned int index; basic_block bb; gimple_stmt_iterator bsi; /* This routine assumes that CFUN is set to FN, as it needs to call basic GIMPLE routines that use CFUN. */ gcc_assert (cfun == fn); index = streamer_read_uhwi (ib); bb = BASIC_BLOCK_FOR_FN (fn, index); bb->count = apply_scale (streamer_read_gcov_count (ib), count_materialization_scale); bb->frequency = streamer_read_hwi (ib); bb->flags = streamer_read_hwi (ib); /* LTO_bb1 has statements. LTO_bb0 does not. */ if (tag == LTO_bb0) return; bsi = gsi_start_bb (bb); tag = streamer_read_record_start (ib); while (tag) { gimple *stmt = input_gimple_stmt (ib, data_in, tag); gsi_insert_after (&bsi, stmt, GSI_NEW_STMT); /* After the statement, expect a 0 delimiter or the EH region that the previous statement belongs to. */ tag = streamer_read_record_start (ib); lto_tag_check_set (tag, 2, LTO_eh_region, LTO_null); if (tag == LTO_eh_region) { HOST_WIDE_INT region = streamer_read_hwi (ib); gcc_assert (region == (int) region); add_stmt_to_eh_lp (stmt, region); } tag = streamer_read_record_start (ib); } tag = streamer_read_record_start (ib); while (tag) { input_phi (ib, bb, data_in, fn); tag = streamer_read_record_start (ib); } }
static gphi * input_phi (struct lto_input_block *ib, basic_block bb, struct data_in *data_in, struct function *fn) { unsigned HOST_WIDE_INT ix; tree phi_result; int i, len; gphi *result; ix = streamer_read_uhwi (ib); phi_result = (*SSANAMES (fn))[ix]; len = EDGE_COUNT (bb->preds); result = create_phi_node (phi_result, bb); /* We have to go through a lookup process here because the preds in the reconstructed graph are generally in a different order than they were in the original program. */ for (i = 0; i < len; i++) { tree def = stream_read_tree (ib, data_in); int src_index = streamer_read_uhwi (ib); bitpack_d bp = streamer_read_bitpack (ib); /* Do not cache a location - we do not have API to get pointer to the location in PHI statement and we may trigger reallocation. */ location_t arg_loc = stream_input_location_now (&bp, data_in); basic_block sbb = BASIC_BLOCK_FOR_FN (fn, src_index); edge e = NULL; int j; for (j = 0; j < len; j++) if (EDGE_PRED (bb, j)->src == sbb) { e = EDGE_PRED (bb, j); break; } add_phi_arg (result, def, e, arg_loc); } return result; }
continue; } else { useblock = gimple_bb (usestmt); } /* Short circuit. Nothing dominates the entry block. */ if (useblock == ENTRY_BLOCK_PTR_FOR_FN (cfun)) { BITMAP_FREE (blocks); return NULL; } bitmap_set_bit (blocks, useblock->index); } commondom = BASIC_BLOCK_FOR_FN (cfun, bitmap_first_set_bit (blocks)); EXECUTE_IF_SET_IN_BITMAP (blocks, 0, j, bi) commondom = nearest_common_dominator (CDI_DOMINATORS, commondom, BASIC_BLOCK_FOR_FN (cfun, j)); BITMAP_FREE (blocks); return commondom; } /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator tree, return the best basic block between them (inclusive) to place statements. We want the most control dependent block in the shallowest loop nest. If the resulting block is in a shallower loop nest, then use it. Else only use the resulting block if it has significantly lower execution
void func_fma_steering::analyze () { int i, n_blocks, *bb_dfs_preorder; basic_block bb; rtx_insn *insn; bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false); /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB instructions. */ for (i = 0; i < n_blocks; i++) { bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]); FOR_BB_INSNS (bb, insn) { operand_rr_info *dest_op_info; struct du_chain *chain; unsigned dest_regno; fma_forest *forest; du_head_p head; int i; if (!is_fmul_fmac_insn (insn, true)) continue; /* Search the chain where this instruction is (one of) the root. */ dest_op_info = insn_rr[INSN_UID (insn)].op_info; dest_regno = REGNO (SET_DEST (PATTERN (insn))); for (i = 0; i < dest_op_info->n_chains; i++) { /* The register tracked by this chain does not match the destination register of insn. */ if (dest_op_info->heads[i]->regno != dest_regno) continue; head = dest_op_info->heads[i]; /* The chain was merged in another, find the new head. */ if (!head->first) head = regrename_chain_from_id (head->id); /* Search the chain element for this instruction and, if another FMUL or FMADD/FMSUB instruction was already processed, note the forest of its tree. */ forest = NULL; for (chain = head->first; chain; chain = chain->next_use) { fma_node **fma_slot; if (!is_fmul_fmac_insn (chain->insn, true)) continue; /* This is a use, continue. */ if (chain->loc != &SET_DEST (PATTERN (chain->insn))) continue; if (chain->insn == insn) break; fma_slot = this->m_insn_fma_head_map->get (chain->insn); if (fma_slot && (*fma_slot)->get_children ()) forest = (*fma_slot)->get_forest (); } if (chain) break; } /* We didn't find a chain with a def for this instruction. */ gcc_assert (i < dest_op_info->n_chains); this->analyze_fma_fmul_insn (forest, chain, head); } }
DEBUG_FUNCTION void debug_bb_n_slim (int n) { basic_block bb = BASIC_BLOCK_FOR_FN (cfun, n); debug_bb_slim (bb); }
unsigned fix_loop_structure (bitmap changed_bbs) { basic_block bb; int record_exits = 0; struct loop *loop; unsigned old_nloops, i; timevar_push (TV_LOOP_INIT); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "fix_loop_structure: fixing up loops for function\n"); /* We need exact and fast dominance info to be available. */ gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK); if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS)) { release_recorded_exits (cfun); record_exits = LOOPS_HAVE_RECORDED_EXITS; } /* Remember the depth of the blocks in the loop hierarchy, so that we can recognize blocks whose loop nesting relationship has changed. */ if (changed_bbs) FOR_EACH_BB_FN (bb, cfun) bb->aux = (void *) (size_t) loop_depth (bb->loop_father); /* Remove the dead loops from structures. We start from the innermost loops, so that when we remove the loops, we know that the loops inside are preserved, and do not waste time relinking loops that will be removed later. */ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { /* Detect the case that the loop is no longer present even though it wasn't marked for removal. ??? If we do that we can get away with not marking loops for removal at all. And possibly avoid some spurious removals. */ if (loop->header && bb_loop_header_p (loop->header)) continue; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "fix_loop_structure: removing loop %d\n", loop->num); while (loop->inner) { struct loop *ploop = loop->inner; flow_loop_tree_node_remove (ploop); flow_loop_tree_node_add (loop_outer (loop), ploop); } /* Remove the loop. */ if (loop->header) loop->former_header = loop->header; else gcc_assert (loop->former_header != NULL); loop->header = NULL; flow_loop_tree_node_remove (loop); } /* Remember the number of loops so we can return how many new loops flow_loops_find discovered. */ old_nloops = number_of_loops (cfun); /* Re-compute loop structure in-place. */ flow_loops_find (current_loops); /* Mark the blocks whose loop has changed. */ if (changed_bbs) { FOR_EACH_BB_FN (bb, cfun) { if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux) bitmap_set_bit (changed_bbs, bb->index); bb->aux = NULL; } } /* Finally free deleted loops. */ bool any_deleted = false; FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop) if (loop && loop->header == NULL) { if (dump_file && ((unsigned) loop->former_header->index < basic_block_info_for_fn (cfun)->length ())) { basic_block former_header = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index); /* If the old header still exists we want to check if the original loop is re-discovered or the old header is now part of a newly discovered loop. In both cases we should have avoided removing the loop. */ if (former_header == loop->former_header) { if (former_header->loop_father->header == former_header) fprintf (dump_file, "fix_loop_structure: rediscovered " "removed loop %d as loop %d with old header %d\n", loop->num, former_header->loop_father->num, former_header->index); else if ((unsigned) former_header->loop_father->num >= old_nloops) fprintf (dump_file, "fix_loop_structure: header %d of " "removed loop %d is part of the newly " "discovered loop %d with header %d\n", former_header->index, loop->num, former_header->loop_father->num, former_header->loop_father->header->index); } } (*get_loops (cfun))[i] = NULL; flow_loop_free (loop); any_deleted = true; } /* If we deleted loops then the cached scalar evolutions refering to those loops become invalid. */ if (any_deleted && scev_initialized_p ()) scev_reset_htab (); loops_state_clear (LOOPS_NEED_FIXUP); /* Apply flags to loops. */ apply_loop_flags (current_loops->state | record_exits); checking_verify_loop_structure (); timevar_pop (TV_LOOP_INIT); return number_of_loops (cfun) - old_nloops; }
static void compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin, sbitmap *antout) { basic_block bb; edge e; basic_block *worklist, *qin, *qout, *qend; unsigned int qlen; edge_iterator ei; /* Allocate a worklist array/queue. Entries are only added to the list if they were not already on the list. So the size is bounded by the number of basic blocks. */ qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); /* We want a maximal solution, so make an optimistic initialization of ANTIN. */ bitmap_vector_ones (antin, last_basic_block_for_fn (cfun)); /* Put every block on the worklist; this is necessary because of the optimistic initialization of ANTIN above. */ int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); int postorder_num = post_order_compute (postorder, false, false); for (int i = 0; i < postorder_num; ++i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); *qin++ = bb; bb->aux = bb; } free (postorder); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; /* Mark blocks which are predecessors of the exit block so that we can easily identify them below. */ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) e->src->aux = EXIT_BLOCK_PTR_FOR_FN (cfun); /* Iterate until the worklist is empty. */ while (qlen) { /* Take the first entry off the worklist. */ bb = *qout++; qlen--; if (qout >= qend) qout = worklist; if (bb->aux == EXIT_BLOCK_PTR_FOR_FN (cfun)) /* Do not clear the aux field for blocks which are predecessors of the EXIT block. That way we never add then to the worklist again. */ bitmap_clear (antout[bb->index]); else { /* Clear the aux field of this block so that it can be added to the worklist again if necessary. */ bb->aux = NULL; bitmap_intersection_of_succs (antout[bb->index], antin, bb); } if (bitmap_or_and (antin[bb->index], antloc[bb->index], transp[bb->index], antout[bb->index])) /* If the in state of this block changed, then we need to add the predecessors of this block to the worklist if they are not already on the worklist. */ FOR_EACH_EDGE (e, ei, bb->preds) if (!e->src->aux && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) { *qin++ = e->src; e->src->aux = e; qlen++; if (qin >= qend) qin = worklist; } } clear_aux_for_edges (); clear_aux_for_blocks (); free (worklist); }
void compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout, sbitmap *avin) { edge e; basic_block *worklist, *qin, *qout, *qend, bb; unsigned int qlen; edge_iterator ei; /* Allocate a worklist array/queue. Entries are only added to the list if they were not already on the list. So the size is bounded by the number of basic blocks. */ qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); /* We want a maximal solution. */ bitmap_vector_ones (avout, last_basic_block_for_fn (cfun)); /* Put every block on the worklist; this is necessary because of the optimistic initialization of AVOUT above. Use inverted postorder to make the dataflow problem require less iterations. */ int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); int postorder_num = inverted_post_order_compute (postorder); for (int i = 0; i < postorder_num; ++i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) continue; *qin++ = bb; bb->aux = bb; } free (postorder); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; /* Mark blocks which are successors of the entry block so that we can easily identify them below. */ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); /* Iterate until the worklist is empty. */ while (qlen) { /* Take the first entry off the worklist. */ bb = *qout++; qlen--; if (qout >= qend) qout = worklist; /* If one of the predecessor blocks is the ENTRY block, then the intersection of avouts is the null set. We can identify such blocks by the special value in the AUX field in the block structure. */ if (bb->aux == ENTRY_BLOCK_PTR_FOR_FN (cfun)) /* Do not clear the aux field for blocks which are successors of the ENTRY block. That way we never add then to the worklist again. */ bitmap_clear (avin[bb->index]); else { /* Clear the aux field of this block so that it can be added to the worklist again if necessary. */ bb->aux = NULL; bitmap_intersection_of_preds (avin[bb->index], avout, bb); } if (bitmap_ior_and_compl (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index])) /* If the out state of this block changed, then we need to add the successors of this block to the worklist if they are not already on the worklist. */ FOR_EACH_EDGE (e, ei, bb->succs) if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) { *qin++ = e->dest; e->dest->aux = e; qlen++; if (qin >= qend) qin = worklist; } } clear_aux_for_edges (); clear_aux_for_blocks (); free (worklist); }
static void compute_laterin (struct edge_list *edge_list, sbitmap *earliest, sbitmap *antloc, sbitmap *later, sbitmap *laterin) { int num_edges, i; edge e; basic_block *worklist, *qin, *qout, *qend, bb; unsigned int qlen; edge_iterator ei; num_edges = NUM_EDGES (edge_list); /* Allocate a worklist array/queue. Entries are only added to the list if they were not already on the list. So the size is bounded by the number of basic blocks. */ qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); /* Initialize a mapping from each edge to its index. */ for (i = 0; i < num_edges; i++) INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; /* We want a maximal solution, so initially consider LATER true for all edges. This allows propagation through a loop since the incoming loop edge will have LATER set, so if all the other incoming edges to the loop are set, then LATERIN will be set for the head of the loop. If the optimistic setting of LATER on that edge was incorrect (for example the expression is ANTLOC in a block within the loop) then this algorithm will detect it when we process the block at the head of the optimistic edge. That will requeue the affected blocks. */ bitmap_vector_ones (later, num_edges); /* Note that even though we want an optimistic setting of LATER, we do not want to be overly optimistic. Consider an outgoing edge from the entry block. That edge should always have a LATER value the same as EARLIEST for that edge. */ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) bitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]); /* Add all the blocks to the worklist. This prevents an early exit from the loop given our optimistic initialization of LATER above. */ int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); int postorder_num = inverted_post_order_compute (postorder); for (int i = 0; i < postorder_num; ++i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) continue; *qin++ = bb; bb->aux = bb; } free (postorder); /* Note that we do not use the last allocated element for our queue, as EXIT_BLOCK is never inserted into it. */ qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; /* Iterate until the worklist is empty. */ while (qlen) { /* Take the first entry off the worklist. */ bb = *qout++; bb->aux = NULL; qlen--; if (qout >= qend) qout = worklist; /* Compute the intersection of LATERIN for each incoming edge to B. */ bitmap_ones (laterin[bb->index]); FOR_EACH_EDGE (e, ei, bb->preds) bitmap_and (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]); /* Calculate LATER for all outgoing edges. */ FOR_EACH_EDGE (e, ei, bb->succs) if (bitmap_ior_and_compl (later[(size_t) e->aux], earliest[(size_t) e->aux], laterin[bb->index], antloc[bb->index]) /* If LATER for an outgoing edge was changed, then we need to add the target of the outgoing edge to the worklist. */ && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0) { *qin++ = e->dest; e->dest->aux = e; qlen++; if (qin >= qend) qin = worklist; } } /* Computation of insertion and deletion points requires computing LATERIN for the EXIT block. We allocated an extra entry in the LATERIN array for just this purpose. */ bitmap_ones (laterin[last_basic_block_for_fn (cfun)]); FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) bitmap_and (laterin[last_basic_block_for_fn (cfun)], laterin[last_basic_block_for_fn (cfun)], later[(size_t) e->aux]); clear_aux_for_edges (); free (worklist); }
void func_fma_steering::analyze () { int i, n_blocks, *bb_dfs_preorder; basic_block bb; rtx_insn *insn; bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false); /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB instructions. */ for (i = 0; i < n_blocks; i++) { bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]); FOR_BB_INSNS (bb, insn) { operand_rr_info *dest_op_info; struct du_chain *chain = NULL; unsigned dest_regno; fma_forest *forest = NULL; du_head_p head = NULL; int i; if (!is_fmul_fmac_insn (insn, true)) continue; /* Search the chain where this instruction is (one of) the root. */ dest_op_info = insn_rr[INSN_UID (insn)].op_info; dest_regno = REGNO (SET_DEST (PATTERN (insn))); for (i = 0; i < dest_op_info->n_chains; i++) { /* The register tracked by this chain does not match the destination register of insn. */ if (dest_op_info->heads[i]->regno != dest_regno) continue; head = dest_op_info->heads[i]; /* The chain was merged in another, find the new head. */ if (!head->first) head = regrename_chain_from_id (head->id); /* Search the chain element for this instruction and, if another FMUL or FMADD/FMSUB instruction was already processed, note the forest of its tree. */ forest = NULL; for (chain = head->first; chain; chain = chain->next_use) { fma_node **fma_slot; if (!is_fmul_fmac_insn (chain->insn, true)) continue; /* This is a use, continue. */ if (chain->loc != &SET_DEST (PATTERN (chain->insn))) continue; if (chain->insn == insn) break; fma_slot = this->m_insn_fma_head_map->get (chain->insn); if (fma_slot && (*fma_slot)->get_children ()) forest = (*fma_slot)->get_forest (); } if (chain) break; } /* Due to implementation of regrename, dest register can slip away from regrename's analysis. As a result, there is no chain for the destination register of insn. We simply skip the insn even it is a fmul/fmac instruction. This can happen when the dest register is also a source register of insn and one of the below conditions is satisfied: 1) the source reg is setup in larger mode than this insn; 2) the source reg is uninitialized; 3) the source reg is passed in as parameter. */ if (i < dest_op_info->n_chains) this->analyze_fma_fmul_insn (forest, chain, head); } }