/** Write just the specified regions back to the file */ void SgAsmGenericSection::unparse(std::ostream &f, const ExtentMap &map) const { for (ExtentMap::const_iterator i=map.begin(); i!=map.end(); ++i) { Extent e = i->first; assert(e.first()+e.size() <= get_size()); const unsigned char *extent_data; if (e.first() >= p_data.size()) { extent_data = NULL; } else if (e.first() + e.size() > p_data.size()) { extent_data = &p_data[e.first()]; } else { extent_data = &p_data[e.first()]; } if (extent_data) write(f, e.first(), e.size(), extent_data); } }
void SgAsmGenericFile::shift_extend(SgAsmGenericSection *s, rose_addr_t sa, rose_addr_t sn, AddressSpace space, Elasticity elasticity) { ROSE_ASSERT(s!=NULL); ROSE_ASSERT(s->get_file()==this); ROSE_ASSERT((space & (ADDRSP_FILE|ADDRSP_MEMORY)) != 0); const bool debug = false; static size_t ncalls=0; char p[256]; if (debug) { const char *space_s="unknown"; if (space & ADDRSP_FILE) { space_s = "file"; } else if (space & ADDRSP_MEMORY) { space_s = "memory"; } sprintf(p, "SgAsmGenericFile::shift_extend[%" PRIuPTR "]: ", ncalls++); fprintf(stderr, "%s -- START --\n", p); fprintf(stderr, "%s S = [%d] \"%s\"\n", p, s->get_id(), s->get_name()->get_string(true).c_str()); fprintf(stderr, "%s %s Sa=0x%08" PRIx64 " (%" PRIu64 "), Sn=0x%08" PRIx64 " (%" PRIu64 ")\n", p, space_s, sa, sa, sn, sn); fprintf(stderr, "%s elasticity = %s\n", p, (ELASTIC_NONE==elasticity ? "none" : ELASTIC_UNREF==elasticity ? "unref" : ELASTIC_HOLE==elasticity ? "unref+holes" : "unknown")); } /* No-op case */ if (0==sa && 0==sn) { if (debug) { fprintf(stderr, "%s No change necessary.\n", p); fprintf(stderr, "%s -- END --\n", p); } return; } bool filespace = (space & ADDRSP_FILE)!=0; bool memspace = (space & ADDRSP_MEMORY)!=0; rose_addr_t align=1, aligned_sa, aligned_sasn; SgAsmGenericSectionPtrList neighbors, villagers; ExtentMap amap; /* address mappings for all extents */ Extent sp; /* Get a list of all sections that may need to be adjusted. */ SgAsmGenericSectionPtrList all; switch (elasticity) { case ELASTIC_NONE: case ELASTIC_UNREF: all = filespace ? get_sections() : get_mapped_sections(); break; case ELASTIC_HOLE: all = filespace ? get_sections(false) : get_mapped_sections(); break; } if (debug) { fprintf(stderr, "%s Following sections are in 'all' set:\n", p); for (size_t i=0; i<all.size(); i++) { Extent ep; if (filespace) { ep = all[i]->get_file_extent(); } else { ROSE_ASSERT(all[i]->is_mapped()); ep = all[i]->get_mapped_preferred_extent(); } fprintf(stderr, "%s 0x%08" PRIx64 " 0x%08" PRIx64 " 0x%08" PRIx64 " [%d] \"%s\"\n", p, ep.relaxed_first(), ep.size(), ep.relaxed_first()+ep.size(), all[i]->get_id(), all[i]->get_name()->get_string(true).c_str()); } } for (size_t pass=0; pass<2; pass++) { if (debug) { fprintf(stderr, "%s -- %s --\n", p, 0==pass?"FIRST PASS":"******"); } /* S offset and size in file or memory address space */ if (filespace) { sp = s->get_file_extent(); } else if (!memspace || !s->is_mapped()) { return; /*nothing to do*/ } else { sp = s->get_mapped_preferred_extent(); } /* Build address map */ for (size_t i=0; i<all.size(); i++) { if (filespace) { amap.insert(all[i]->get_file_extent()); } else { ROSE_ASSERT(all[i]->is_mapped()); amap.insert(all[i]->get_mapped_preferred_extent()); } } if (debug) { fprintf(stderr, "%s Address map:\n", p); amap.dump_extents(stderr, (std::string(p)+" ").c_str(), "amap"); fprintf(stderr, "%s Extent of S:\n", p); fprintf(stderr, "%s start=0x%08" PRIx64 " size=0x%08" PRIx64 " end=0x%08" PRIx64 "\n", p, sp.relaxed_first(), sp.size(), sp.relaxed_first()+sp.size()); } /* Neighborhood (nhs) of S is a single extent. However, if S is zero size then nhs might be empty. The neighborhood of * S is S plus all sections that overlap with S and all sections that are right-contiguous with S. */ ExtentMap nhs_map; for (ExtentMap::iterator amapi=amap.begin(); amapi!=amap.end(); ++amapi) { if (amapi->first.relaxed_first() <= sp.relaxed_first()+sp.size() && amapi->first.relaxed_first()+amapi->first.size() > sp.relaxed_first()) nhs_map.insert(amapi->first, amapi->second); } if (debug) { fprintf(stderr, "%s Neighborhood of S:\n", p); nhs_map.dump_extents(stderr, (std::string(p)+" ").c_str(), "nhs_map"); } Extent nhs; if (nhs_map.size()>0) { assert(nhs_map.nranges()==1); nhs = nhs_map.begin()->first; } else { nhs = sp; } /* What sections are in the neighborhood (including S), and right of the neighborhood? */ neighbors.clear(); /*sections in neighborhood*/ neighbors.push_back(s); villagers.clear(); /*sections right of neighborhood*/ if (debug) fprintf(stderr, "%s Ignoring left (L) sections:\n", p); for (size_t i=0; i<all.size(); i++) { SgAsmGenericSection *a = all[i]; if (a==s) continue; /*already pushed onto neighbors*/ Extent ap; if (filespace) { ap = a->get_file_extent(); } else if (!a->is_mapped()) { continue; } else { ap = a->get_mapped_preferred_extent(); } switch (ExtentMap::category(ap, nhs)) { case 'L': if (debug) fprintf(stderr, "%s L 0x%08" PRIx64 " 0x%08" PRIx64 " 0x%08" PRIx64 " [%d] \"%s\"\n", p, ap.relaxed_first(), ap.size(), ap.relaxed_first()+ap.size(), a->get_id(), a->get_name()->get_string(true).c_str()); break; case 'R': if (ap.relaxed_first()==nhs.relaxed_first()+nhs.size() && 0==ap.size()) { /* Empty sections immediately right of the neighborhood of S should actually be considered part of the * neighborhood rather than right of it. */ neighbors.push_back(a); } else if (elasticity!=ELASTIC_NONE) { /* If holes are elastic then treat things right of the hole as being part of the right village; otherwise * add those sections to the neighborhood of S even though they fall outside 'nhs' (it's OK because this * partitioning of sections is the only thing we use 'nhs' for anyway. */ villagers.push_back(a); } else if ('L'==ExtentMap::category(ap, sp)) { /*ignore sections left of S*/ } else { neighbors.push_back(a); } break; default: if ('L'!=ExtentMap::category(ap, sp)) /*ignore sections left of S*/ neighbors.push_back(a); break; } } if (debug) { fprintf(stderr, "%s Neighbors:\n", p); for (size_t i=0; i<neighbors.size(); i++) { SgAsmGenericSection *a = neighbors[i]; Extent ap = filespace ? a->get_file_extent() : a->get_mapped_preferred_extent(); rose_addr_t align = filespace ? a->get_file_alignment() : a->get_mapped_alignment(); char cat = ExtentMap::category(ap, sp); fprintf(stderr, "%s %c %c0x%08" PRIx64 " 0x%08" PRIx64 " 0x%08" PRIx64, p, cat, 0==ap.relaxed_first() % (align?align:1) ? ' ' : '!', ap.relaxed_first(), ap.size(), ap.relaxed_first()+ap.size()); if (strchr("RICE", cat)) { fprintf(stderr, " align=0x%08" PRIx64, align); } else { fputs(" ", stderr); } fprintf(stderr, " [%2d] \"%s\"\n", a->get_id(), a->get_name()->get_string(true).c_str()); } if (villagers.size()>0) fprintf(stderr, "%s Villagers:\n", p); for (size_t i=0; i<villagers.size(); i++) { SgAsmGenericSection *a = villagers[i]; Extent ap = filespace ? a->get_file_extent() : a->get_mapped_preferred_extent(); rose_addr_t align = filespace ? a->get_file_alignment() : a->get_mapped_alignment(); fprintf(stderr, "%s %c %c0x%08" PRIx64 " 0x%08" PRIx64 " 0x%08" PRIx64, p, ExtentMap::category(ap, sp), /*cat should always be R*/ 0==ap.relaxed_first() % (align?align:1) ? ' ' : '!', ap.relaxed_first(), ap.size(), ap.relaxed_first()+ap.size()); fputs(" ", stderr); fprintf(stderr, " [%2d] \"%s\"\n", a->get_id(), a->get_name()->get_string(true).c_str()); } } /* Adjust Sa to satisfy all alignment constraints in neighborhood(S) for sections that will move (cats R, I, C, and E). */ align = 1; for (size_t i=0; i<neighbors.size(); i++) { SgAsmGenericSection *a = neighbors[i]; Extent ap = filespace ? a->get_file_extent() : a->get_mapped_preferred_extent(); if (strchr("RICE", ExtentMap::category(ap, sp))) { rose_addr_t x = filespace ? a->get_file_alignment() : a->get_mapped_alignment(); #if BOOST_VERSION < 106900 align = boost::math::lcm(align, x?x:1); // deprecated in boost-1.69.0 #else align = boost::integer::lcm(align, x?x:1); // not present before boost-1.60.0 #endif } } aligned_sa = (sa/align + (sa%align?1:0))*align; aligned_sasn = ((sa+sn)/align + ((sa+sn)%align?1:0))*align; if (debug) { fprintf(stderr, "%s Alignment LCM = 0x%08" PRIx64 " (%" PRIu64 ")\n", p, align, align); fprintf(stderr, "%s Aligned Sa = 0x%08" PRIx64 " (%" PRIu64 ")\n", p, aligned_sa, aligned_sa); fprintf(stderr, "%s Aligned Sa+Sn = 0x%08" PRIx64 " (%" PRIu64 ")\n", p, aligned_sasn, aligned_sasn); } /* Are there any sections to the right of neighborhood(S)? If so, find the one with the lowest start address and use * that to define the size of the hole right of neighborhood(S). */ if (0==villagers.size()) break; SgAsmGenericSection *after_hole = NULL; Extent hp(0, 0); for (size_t i=0; i<villagers.size(); i++) { SgAsmGenericSection *a = villagers[i]; Extent ap = filespace ? a->get_file_extent() : a->get_mapped_preferred_extent(); if (!after_hole || ap.relaxed_first()<hp.relaxed_first()) { after_hole = a; hp = ap; } } ROSE_ASSERT(after_hole); ROSE_ASSERT(hp.relaxed_first() > nhs.relaxed_first()+nhs.size()); rose_addr_t hole_size = hp.relaxed_first() - (nhs.relaxed_first()+nhs.size()); if (debug) { fprintf(stderr, "%s hole size = 0x%08" PRIx64 " (%" PRIu64 "); need 0x%08" PRIx64 " (%" PRIu64 "); %s\n", p, hole_size, hole_size, aligned_sasn, aligned_sasn, hole_size>=aligned_sasn ? "large enough" : "not large enough"); } if (hole_size >= aligned_sasn) break; rose_addr_t need_more = aligned_sasn - hole_size; /* Hole is not large enough. We need to recursively move things that are right of our neighborhood, then recompute the * all-sections address map and neighborhood(S). */ ROSE_ASSERT(0==pass); /*logic problem since the recursive call should have enlarged the hole enough*/ if (debug) { fprintf(stderr, "%s Calling recursively to increase hole size by 0x%08" PRIx64 " (%" PRIu64 ") bytes\n", p, need_more, need_more); } shift_extend(after_hole, need_more, 0, space, elasticity); if (debug) fprintf(stderr, "%s Returned from recursive call\n", p); } /* Consider sections that are in the same neighborhood as S */ if (debug) fprintf(stderr, "%s -- ADJUSTING --\n", p); bool resized_mem = false; for (size_t i=0; i<neighbors.size(); i++) { SgAsmGenericSection *a = neighbors[i]; Extent ap = filespace ? a->get_file_extent() : a->get_mapped_preferred_extent(); switch (ExtentMap::category(ap, sp)) { case 'L': break; case 'R': if (filespace) { a->set_offset(a->get_offset()+aligned_sasn); } else { a->set_mapped_preferred_rva(a->get_mapped_preferred_rva()+aligned_sasn); } break; case 'C': /*including S itself*/ case 'E': if (filespace) { a->set_offset(a->get_offset()+aligned_sa); a->set_size(a->get_size()+sn); if (memspace && !resized_mem && a->is_mapped()) { shift_extend(a, 0, sn, ADDRSP_MEMORY, elasticity); resized_mem = true; } } else { a->set_mapped_preferred_rva(a->get_mapped_preferred_rva()+aligned_sa); a->set_mapped_size(a->get_mapped_size()+sn); } break; case 'O': if (ap.relaxed_first()==sp.relaxed_first()) { if (filespace) { a->set_offset(a->get_offset()+aligned_sa); a->set_size(a->get_size()+sn); } else { a->set_mapped_preferred_rva(a->get_mapped_preferred_rva()+aligned_sa); a->set_mapped_size(a->get_mapped_size()+sn); } } else { if (filespace) { a->set_size(a->get_size()+aligned_sasn); if (memspace && !resized_mem && a->is_mapped()) { shift_extend(a, 0, aligned_sasn, ADDRSP_MEMORY, elasticity); resized_mem = true; } } else { a->set_mapped_size(a->get_mapped_size()+aligned_sasn); } } break; case 'I': if (filespace) { a->set_offset(a->get_offset()+aligned_sa); } else { a->set_mapped_preferred_rva(a->get_mapped_preferred_rva()+aligned_sa); } break; case 'B': if (filespace) { a->set_size(a->get_size()+sn); if (memspace && !resized_mem && a->is_mapped()) { shift_extend(a, 0, sn, ADDRSP_MEMORY, elasticity); resized_mem = true; } } else { a->set_mapped_size(a->get_size()+sn); } break; default: ROSE_ASSERT(!"invalid extent category"); break; } if (debug) { const char *space_name = filespace ? "file" : "mem"; rose_addr_t x = filespace ? a->get_file_alignment() : a->get_mapped_alignment(); fprintf(stderr, "%s %4s-%c %c0x%08" PRIx64 " 0x%08" PRIx64 " 0x%08" PRIx64, p, space_name, ExtentMap::category(ap, sp), 0==ap.relaxed_first()%(x?x:1)?' ':'!', ap.relaxed_first(), ap.size(), ap.relaxed_first()+ap.size()); Extent newap = filespace ? a->get_file_extent() : a->get_mapped_preferred_extent(); fprintf(stderr, " -> %c0x%08" PRIx64 " 0x%08" PRIx64 " 0x%08" PRIx64, 0==newap.relaxed_first()%(x?x:1)?' ':'!', newap.relaxed_first(), newap.size(), newap.relaxed_first()+newap.size()); fprintf(stderr, " [%2d] \"%s\"\n", a->get_id(), a->get_name()->get_string(true).c_str()); } } if (debug) fprintf(stderr, "%s -- END --\n", p); }
AddressIntervalSet toAddressIntervalSet(const ExtentMap &x) { AddressIntervalSet retval; for (ExtentMap::const_iterator iter=x.begin(); iter!=x.end(); ++iter) retval.insert(toAddressInterval(iter->first)); return retval; }
Partitioner::RegionStats * Partitioner::region_statistics(const ExtentMap &addresses) { RegionStats *stats = new_region_stats(); assert(stats!=NULL); size_t nbytes = addresses.size(); if (0==nbytes) return stats; stats->add_sample(RegionStats::RA_NBYTES, nbytes); ExtentMap not_addresses = addresses.invert<ExtentMap>(); Disassembler::AddressSet worklist; // addresses waiting to be disassembled recursively InstructionMap insns_found; // all the instructions we found herein ExtentMap insns_extent; // memory used by the instructions we've found ExtentMap pending = addresses; // addresses we haven't looked at yet /* Undirected local control flow graph used to count connected components */ typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS> CFG; typedef boost::graph_traits<CFG>::vertex_descriptor CFGVertex; typedef std::map<rose_addr_t, CFGVertex> Addr2Vertex; CFG cfg; Addr2Vertex va2id; /* Statistics */ size_t nstarts=0; // number of times the recursive disassembler was started size_t nfails=0; // number of disassembler failures size_t noverlaps=0; // instructions overlapping with a previously found instruction size_t nincomplete=0; // number of instructions with unknown successors size_t nfallthrough=0; // number of branches to fall-through address within our "addresses" size_t ncalls=0; // number of function calls outside our "addresses" size_t nnoncalls=0; // number of branches to non-functions outside our "addresses" size_t ninternal=0; // number of non-fallthrough internal branches while (!pending.empty()) { rose_addr_t start_va = pending.min(); worklist.insert(start_va); ++nstarts; while (!worklist.empty()) { rose_addr_t va = *worklist.begin(); worklist.erase(worklist.begin()); /* Obtain (disassemble) the instruction and make sure it falls entirely within the "addresses" */ Instruction *insn = find_instruction(va); if (!insn) { ++nfails; pending.erase(Extent(va)); continue; } Extent ie(va, insn->get_size()); if (not_addresses.overlaps(ie)) { ++nfails; pending.erase(Extent(va)); continue; } /* The disassembler can also return an "unknown" instruction when failing, depending on how it is invoked. */ if (insn->node->is_unknown()) { ++nfails; pending.erase(Extent(va, insn->get_size())); continue; } insns_found.insert(std::make_pair(va, insn)); rose_addr_t fall_through_va = va + insn->get_size(); /* Does this instruction overlap with any we've already found? */ if (insns_extent.overlaps(ie)) ++noverlaps; pending.erase(Extent(va, insn->get_size())); insns_extent.insert(ie); /* Find instruction successors by looking only at the instruction itself. This is simpler, but less rigorous * method than finding successors a basic block at a time. For instance, we'll find both sides of a branch * instruction even if the more rigorous method determined that one side or the other is always taken. But this is * probably what we want here anyway for determining whether something looks like code. */ bool complete; Disassembler::AddressSet succs = insn->get_successors(&complete); if (!complete) ++nincomplete; /* Add instruction as vertex to CFG */ std::pair<Addr2Vertex::iterator, bool> inserted = va2id.insert(std::make_pair(va, va2id.size())); if (inserted.second) { CFGVertex vertex __attribute__((unused)) = add_vertex(cfg); assert(vertex==inserted.first->second); } /* Classify the various successors. */ for (Disassembler::AddressSet::const_iterator si=succs.begin(); si!=succs.end(); ++si) { rose_addr_t succ_va = *si; if (succ_va==fall_through_va) { ++nfallthrough; if (pending.find(succ_va)!=pending.end()) worklist.insert(succ_va); /* Add edge to CFG graph */ va2id.insert(std::make_pair(succ_va, va2id.size())); add_edge(va2id[va], va2id[succ_va], cfg); } else if (addresses.find(succ_va)==addresses.end()) { /* A non-fallthrough branch to something outside this memory region */ if (functions.find(succ_va)!=functions.end()) { /* A branch to a function entry point we've previously discovered. */ ++ncalls; } else { ++nnoncalls; } } else { /* A non-fallthrough branch to something in our address range. */ ++ninternal; if (pending.find(succ_va)!=pending.end()) worklist.insert(succ_va); /* Add edge to CFG graph */ va2id.insert(std::make_pair(succ_va, va2id.size())); add_edge(va2id[va], va2id[succ_va], cfg); } } } } /* Statistics */ stats->add_sample(RegionStats::RA_NFAILS, nfails); stats->add_sample(RegionStats::RA_NINSNS, insns_found.size()); stats->add_sample(RegionStats::RA_NOVERLAPS, noverlaps); stats->add_sample(RegionStats::RA_NSTARTS, nstarts); stats->add_sample(RegionStats::RA_NCOVERAGE, insns_extent.size()); stats->add_sample(RegionStats::RA_NINCOMPLETE, nincomplete); stats->add_sample(RegionStats::RA_NBRANCHES, ncalls+nnoncalls+ninternal); stats->add_sample(RegionStats::RA_NCALLS, ncalls); stats->add_sample(RegionStats::RA_NNONCALLS, nnoncalls); stats->add_sample(RegionStats::RA_NINTERNAL, ninternal); stats->add_sample(RegionStats::RA_NICFGEDGES, ninternal + nfallthrough); stats->add_sample(RegionStats::RA_NIUNIQUE, count_kinds(insns_found)); stats->add_sample(RegionStats::RA_NPRIV, count_privileged(insns_found)); stats->add_sample(RegionStats::RA_NFLOAT, count_floating_point(insns_found)); double regsz, regvar; stats->add_sample(RegionStats::RA_NREGREFS, count_registers(insns_found, ®sz, ®var)); stats->add_sample(RegionStats::RA_REGSZ, regsz); stats->add_sample(RegionStats::RA_REGVAR, regvar); /* Count the number of connected components in the undirected CFG */ if (!va2id.empty()) { std::vector<int> component(num_vertices(cfg)); stats->add_sample(RegionStats::RA_NCOMPS, connected_components(cfg, &component[0])); } stats->compute_ratios(); return stats; }