/** Reads content of a section and returns it as a container. The returned container will always have exactly @p size byte. * If @p size bytes are not available in this section at the specified offset then the container will be zero padded. */ SgUnsignedCharList SgAsmGenericSection::read_content_local_ucl(rose_addr_t rel_offset, rose_addr_t size) { SgUnsignedCharList retval; unsigned char *buf = new unsigned char[size]; read_content_local(rel_offset, buf, size, false); /*zero pads; never throws*/ for (size_t i=0; i<size; i++) retval.push_back(buf[i]); delete[] buf; return retval; }
/** Initializes this ELF Symbol Section by parsing a file. */ SgAsmElfSymbolSection * SgAsmElfSymbolSection::parse() { SgAsmElfSection::parse(); SgAsmElfFileHeader *fhdr = get_elf_header(); ROSE_ASSERT(fhdr!=NULL); SgAsmElfSectionTableEntry *shdr = get_section_entry(); ROSE_ASSERT(shdr!=NULL); SgAsmElfStringSection *strsec = dynamic_cast<SgAsmElfStringSection*>(get_linked_section()); ROSE_ASSERT(strsec!=NULL); size_t entry_size, struct_size, extra_size, nentries; calculate_sizes(&entry_size, &struct_size, &extra_size, &nentries); ROSE_ASSERT(entry_size==shdr->get_sh_entsize()); /* Parse each entry */ for (size_t i=0; i<nentries; i++) { SgAsmElfSymbol *entry=0; if (4==fhdr->get_word_size()) { entry = new SgAsmElfSymbol(this); /*adds symbol to this symbol table*/ SgAsmElfSymbol::Elf32SymbolEntry_disk disk; read_content_local(i*entry_size, &disk, struct_size); entry->parse(fhdr->get_sex(), &disk); } else if (8==fhdr->get_word_size()) { entry = new SgAsmElfSymbol(this); /*adds symbol to this symbol table*/ SgAsmElfSymbol::Elf64SymbolEntry_disk disk; read_content_local(i*entry_size, &disk, struct_size); entry->parse(fhdr->get_sex(), &disk); } else { throw FormatError("unsupported ELF word size"); } if (extra_size>0) entry->get_extra() = read_content_local_ucl(i*entry_size+struct_size, extra_size); } return this; }
/** Extract an unsigned LEB128 value and adjust @p rel_offset according to how many bytes it occupied. If @p strict is set * (the default) and the end of the section is reached then throw an SgAsmExecutableFileFormat::ShortRead exception. Upon * return, the @p rel_offset will be adjusted to point to the first byte after the LEB128 value. */ uint64_t SgAsmGenericSection::read_content_local_uleb128(rose_addr_t *rel_offset, bool strict) { int shift=0; uint64_t retval=0; while (1) { unsigned char byte; read_content_local(*rel_offset, &byte, 1, strict); *rel_offset += 1; ROSE_ASSERT(shift<64); retval |= (byte & 0x7f) << shift; shift += 7; if (0==(byte & 0x80)) break; } return retval; }
/* Parser */ SgAsmPESectionTable* SgAsmPESectionTable::parse() { SgAsmGenericSection::parse(); SgAsmPEFileHeader *fhdr = dynamic_cast<SgAsmPEFileHeader*>(get_header()); ROSE_ASSERT(fhdr!=NULL); /* Parse section table and construct section objects, but do not parse the sections yet. */ SgAsmGenericSectionPtrList pending; const size_t entsize = sizeof(SgAsmPESectionTableEntry::PESectionTableEntry_disk); for (size_t i=0; i<fhdr->get_e_nsections(); i++) { SgAsmPESectionTableEntry::PESectionTableEntry_disk disk; if (entsize!=read_content_local(i * entsize, &disk, entsize, false)) fprintf(stderr, "SgAsmPESectionTable::parse: warning: section table entry %" PRIuPTR " at file offset 0x%08"PRIx64 " extends beyond end of defined section table.\n", i, get_offset()+i*entsize); SgAsmPESectionTableEntry *entry = new SgAsmPESectionTableEntry(&disk); SgAsmPESection *section = NULL; if (entry->get_name() == ".idata") { section = new SgAsmPEImportSection(fhdr); } else { section = new SgAsmPESection(fhdr); } section->init_from_section_table(entry, i+1); pending.push_back(section); } /* Build the memory mapping like the real loader would do. This is the same code used by * SgAsmExecutableFileFormat::parseBinaryFormat() except we're doing it here early because we need it in the rest of the * PE parser. */ ROSE_ASSERT(NULL==fhdr->get_loader_map()); BinaryLoader *loader = BinaryLoader::lookup(fhdr); /*no need to clone; we're not changing any settings*/ ROSE_ASSERT(loader!=NULL); MemoryMap *loader_map = new MemoryMap; loader->remap(loader_map, fhdr); fhdr->set_loader_map(loader_map); /* Parse each section after the loader map is created */ for (size_t i=0; i<pending.size(); i++) pending[i]->parse(); return this; }
/* Adds the RVA/Size pairs to the end of the PE file header */ void SgAsmPEFileHeader::add_rvasize_pairs() { rose_addr_t pairs_offset = get_size(); rose_addr_t pairs_size = p_e_num_rvasize_pairs * sizeof(SgAsmPERVASizePair::RVASizePair_disk); SgAsmPERVASizePair::RVASizePair_disk pairs_disk; ROSE_ASSERT(p_rvasize_pairs != NULL); ROSE_ASSERT(p_rvasize_pairs->get_pairs().size()==0); p_rvasize_pairs->set_isModified(true); extend(pairs_size); for (size_t i = 0; i < p_e_num_rvasize_pairs; i++, pairs_offset += sizeof pairs_disk) { if (sizeof(pairs_disk)!=read_content_local(pairs_offset, &pairs_disk, sizeof pairs_disk, false)) fprintf(stderr, "SgAsmPEFileHeader::add_rvasize_pairs: warning: RVA/Size pair %zu at file offset 0x%08"PRIx64 " extends beyond the end of file (assuming 0/0)\n", i, get_offset()+pairs_offset); p_rvasize_pairs->get_pairs().push_back(new SgAsmPERVASizePair(p_rvasize_pairs, &pairs_disk)); } }
/** Reads a string from the file. The string begins at the specified file offset relative to the start of this section and * continues until the first NUL byte or the end of section is reached. However, if @p strict is set (the default) and we * reach the end-of-section then an SgAsmExecutableFileFormat::ShortRead exception is thrown. */ std::string SgAsmGenericSection::read_content_local_str(rose_addr_t rel_offset, bool strict) { static char *buf=NULL; static size_t nalloc=0; size_t nused=0; while (1) { if (nused >= nalloc) { nalloc = std::max((size_t)32, 2*nalloc); buf = (char*)realloc(buf, nalloc); ROSE_ASSERT(buf!=NULL); } unsigned char byte; read_content_local(rel_offset+nused, &byte, 1, strict); if (!byte) return std::string(buf, nused); buf[nused++] = byte; } }
/** Initialize the header with information parsed from the file and construct and parse everything that's reachable from the * header. The PE File Header should have been constructed such that SgAsmPEFileHeader::ctor() was called. */ SgAsmPEFileHeader* SgAsmPEFileHeader::parse() { SgAsmGenericHeader::parse(); /* Read header, zero padding if the file isn't large enough */ PEFileHeader_disk fh; if (sizeof(fh)>get_size()) extend(sizeof(fh)-get_size()); if (sizeof(fh)!=read_content_local(0, &fh, sizeof fh, false)) fprintf(stderr, "SgAsmPEFileHeader::parse: warning: short read of PE header at byte 0x%08"PRIx64"\n", get_offset()); /* Check magic number before getting too far */ if (fh.e_magic[0]!='P' || fh.e_magic[1]!='E' || fh.e_magic[2]!='\0' || fh.e_magic[3]!='\0') throw FormatError("Bad PE magic number"); /* Decode COFF file header */ p_e_cpu_type = le_to_host(fh.e_cpu_type); p_e_nsections = le_to_host(fh.e_nsections); p_e_time = le_to_host(fh.e_time); p_e_coff_symtab = le_to_host(fh.e_coff_symtab); p_e_coff_nsyms = le_to_host(fh.e_coff_nsyms); p_e_nt_hdr_size = le_to_host(fh.e_nt_hdr_size); p_e_flags = le_to_host(fh.e_flags); /* Read the "Optional Header" (optional in the sense that not all files have one, but required for an executable), the * size of which is stored in the e_nt_hdr_size of the main PE file header. According to * http://www.phreedom.org/solar/code/tinype the Windows loader honors the e_nt_hdr_size even when set to smaller than the * smallest possible documented size of the optional header. Also it's possible for the optional header to extend beyond * the end of the file, in which case that part should be read as zero. */ PE32OptHeader_disk oh32; rose_addr_t need32 = sizeof(PEFileHeader_disk) + std::min(p_e_nt_hdr_size, (rose_addr_t)(sizeof oh32)); if (need32>get_size()) extend(need32-get_size()); if (sizeof(oh32)!=read_content_local(sizeof fh, &oh32, sizeof oh32, false)) fprintf(stderr, "SgAsmPEFileHeader::parse: warning: short read of PE Optional Header at byte 0x%08"PRIx64"\n", get_offset() + sizeof(fh)); p_e_opt_magic = le_to_host(oh32.e_opt_magic); /* File format changes from ctor() */ p_exec_format->set_purpose(p_e_flags & HF_PROGRAM ? PURPOSE_EXECUTABLE : PURPOSE_LIBRARY); p_exec_format->set_word_size(0x010b==p_e_opt_magic? 4 : 8); /* Decode the optional header. */ rose_addr_t entry_rva; if (4==p_exec_format->get_word_size()) { p_e_lmajor = le_to_host(oh32.e_lmajor); p_e_lminor = le_to_host(oh32.e_lminor); p_e_code_size = le_to_host(oh32.e_code_size); p_e_data_size = le_to_host(oh32.e_data_size); p_e_bss_size = le_to_host(oh32.e_bss_size); entry_rva = le_to_host(oh32.e_entrypoint_rva); p_e_code_rva = le_to_host(oh32.e_code_rva); p_e_data_rva = le_to_host(oh32.e_data_rva); p_base_va = le_to_host(oh32.e_image_base); p_e_section_align = le_to_host(oh32.e_section_align); p_e_file_align = le_to_host(oh32.e_file_align); p_e_os_major = le_to_host(oh32.e_os_major); p_e_os_minor = le_to_host(oh32.e_os_minor); p_e_user_major = le_to_host(oh32.e_user_major); p_e_user_minor = le_to_host(oh32.e_user_minor); p_e_subsys_major = le_to_host(oh32.e_subsys_major); p_e_subsys_minor = le_to_host(oh32.e_subsys_minor); p_e_reserved9 = le_to_host(oh32.e_reserved9); p_e_image_size = le_to_host(oh32.e_image_size); p_e_header_size = le_to_host(oh32.e_header_size); p_e_file_checksum = le_to_host(oh32.e_file_checksum); p_e_subsystem = le_to_host(oh32.e_subsystem); p_e_dll_flags = le_to_host(oh32.e_dll_flags); p_e_stack_reserve_size = le_to_host(oh32.e_stack_reserve_size); p_e_stack_commit_size = le_to_host(oh32.e_stack_commit_size); p_e_heap_reserve_size = le_to_host(oh32.e_heap_reserve_size); p_e_heap_commit_size = le_to_host(oh32.e_heap_commit_size); p_e_loader_flags = le_to_host(oh32.e_loader_flags); p_e_num_rvasize_pairs = le_to_host(oh32.e_num_rvasize_pairs); } else if (8==p_exec_format->get_word_size()) { /* We guessed wrong. This is a 64-bit header, not 32-bit. */ PE64OptHeader_disk oh64; rose_addr_t need64 = sizeof(PEFileHeader_disk) + std::min(p_e_nt_hdr_size, (rose_addr_t)(sizeof oh64)); if (need64>get_size()) extend(need64-get_size()); if (sizeof(oh64)!=read_content_local(sizeof fh, &oh64, sizeof oh64)) fprintf(stderr, "SgAsmPEFileHeader::parse: warning: short read of PE Optional Header at byte 0x%08"PRIx64"\n", get_offset() + sizeof(fh)); p_e_lmajor = le_to_host(oh64.e_lmajor); p_e_lminor = le_to_host(oh64.e_lminor); p_e_code_size = le_to_host(oh64.e_code_size); p_e_data_size = le_to_host(oh64.e_data_size); p_e_bss_size = le_to_host(oh64.e_bss_size); entry_rva = le_to_host(oh64.e_entrypoint_rva); p_e_code_rva = le_to_host(oh64.e_code_rva); // p_e_data_rva = le_to_host(oh.e_data_rva); /* not in PE32+ */ p_base_va = le_to_host(oh64.e_image_base); p_e_section_align = le_to_host(oh64.e_section_align); p_e_file_align = le_to_host(oh64.e_file_align); p_e_os_major = le_to_host(oh64.e_os_major); p_e_os_minor = le_to_host(oh64.e_os_minor); p_e_user_major = le_to_host(oh64.e_user_major); p_e_user_minor = le_to_host(oh64.e_user_minor); p_e_subsys_major = le_to_host(oh64.e_subsys_major); p_e_subsys_minor = le_to_host(oh64.e_subsys_minor); p_e_reserved9 = le_to_host(oh64.e_reserved9); p_e_image_size = le_to_host(oh64.e_image_size); p_e_header_size = le_to_host(oh64.e_header_size); p_e_file_checksum = le_to_host(oh64.e_file_checksum); p_e_subsystem = le_to_host(oh64.e_subsystem); p_e_dll_flags = le_to_host(oh64.e_dll_flags); p_e_stack_reserve_size = le_to_host(oh64.e_stack_reserve_size); p_e_stack_commit_size = le_to_host(oh64.e_stack_commit_size); p_e_heap_reserve_size = le_to_host(oh64.e_heap_reserve_size); p_e_heap_commit_size = le_to_host(oh64.e_heap_commit_size); p_e_loader_flags = le_to_host(oh64.e_loader_flags); p_e_num_rvasize_pairs = le_to_host(oh64.e_num_rvasize_pairs); } else { throw FormatError("unrecognized Windows PE optional header magic number"); } /* Magic number */ p_magic.clear(); for (size_t i = 0; i < sizeof(fh.e_magic); ++i) p_magic.push_back(fh.e_magic[i]); /* File format */ ROSE_ASSERT(p_e_lmajor <= 0xffff && p_e_lminor <= 0xffff); p_exec_format->set_version((p_e_lmajor << 16) | p_e_lminor); p_exec_format->set_is_current_version(true); /*FIXME*/ /* Target architecture */ switch (p_e_cpu_type) { case 0x0000: set_isa(ISA_UNSPECIFIED); break; case 0x014c: set_isa(ISA_IA32_386); break; case 0x014d: set_isa(ISA_IA32_486); break; case 0x014e: set_isa(ISA_IA32_Pentium); break; case 0x0162: set_isa(ISA_MIPS_MarkI); /* R2000, R3000 */ break; case 0x0163: set_isa(ISA_MIPS_MarkII); /* R6000 */ break; case 0x0166: set_isa(ISA_MIPS_MarkIII); /* R4000 */ break; case 0x01a2: /*Hitachi SH3*/ case 0x01a3: /*Hitachi SH3 with FPU*/ case 0x01a6: /*Hitachi SH4*/ case 0x01a8: /*Hitachi SH5*/ set_isa(ISA_Hitachi_SH); break; case 0x01c0: set_isa(ISA_ARM_Family); break; case 0x01d3: set_isa(ISA_Matsushita_AM33); break; case 0x01f0: /*w/o FPU*/ case 0x01f1: /*with FPU*/ set_isa(ISA_PowerPC); break; case 0x0200: set_isa(ISA_IA64_Family); break; case 0x0266: set_isa(ISA_MIPS_16); break; case 0x0366: set_isa(ISA_MIPS_FPU); break; case 0x0466: set_isa(ISA_MIPS_16FPU); break; case 0x0ebc: set_isa(ISA_EFI_ByteCode); break; case 0x8664: set_isa(ISA_X8664_Family); break; case 0x9041: set_isa(ISA_Mitsubishi_M32R); break; default: fprintf(stderr, "SgAsmPEFileHeader::parse: warning: unrecognized e_cputype = 0x%x (%u)\n", p_e_cpu_type, p_e_cpu_type); set_isa(ISA_OTHER); break; } /* The NT loader normally maps this file header at the header's base virtual address. */ set_mapped_preferred_rva(0); set_mapped_actual_va(0); /* will be assigned by BinaryLoader */ set_mapped_size(p_e_header_size); set_mapped_alignment(0); set_mapped_rperm(true); set_mapped_wperm(false); set_mapped_xperm(false); /* Entry point. We will eventually bind the entry point to a particular section (in SgAsmPEFileHeader::parse) so that if * sections are rearranged, extended, etc. the entry point will be updated automatically. */ add_entry_rva(entry_rva); /* The PE File Header has a fixed-size component followed by some number of RVA/Size pairs. The add_rvasize_pairs() will * extend the header and parse the RVA/Size pairs. */ if (get_e_num_rvasize_pairs() > 1000) { fprintf(stderr, "warning: PE File Header contains an unreasonable number of Rva/Size pairs. Limiting to 1000.\n"); set_e_num_rvasize_pairs(1000); } add_rvasize_pairs(); /* Construct the section table and its sections (non-synthesized sections). The specification says that the section table * comes after the optional (NT) header, which in turn comes after the fixed part of the PE header. The size of the * optional header is indicated in the fixed header. */ rose_addr_t secttab_offset = get_offset() + sizeof(PEFileHeader_disk) + get_e_nt_hdr_size(); rose_addr_t secttab_size = get_e_nsections() * sizeof(SgAsmPESectionTableEntry::PESectionTableEntry_disk); SgAsmPESectionTable *secttab = new SgAsmPESectionTable(this); secttab->set_offset(secttab_offset); secttab->set_size(secttab_size); secttab->parse(); set_section_table(secttab); /* Parse the COFF symbol table */ if (get_e_coff_symtab() && get_e_coff_nsyms()) { SgAsmCoffSymbolTable *symtab = new SgAsmCoffSymbolTable(this); symtab->set_offset(get_e_coff_symtab()); symtab->parse(); set_coff_symtab(symtab); } /* Associate RVAs with particular sections so that if a section's mapping is changed the RVA gets adjusted automatically. */ ROSE_ASSERT(get_entry_rvas().size()==1); get_entry_rvas()[0].bind(this); set_e_code_rva(get_e_code_rva().bind(this)); set_e_data_rva(get_e_data_rva().bind(this)); /* Turn header-specified tables (RVA/Size pairs) into generic sections */ create_table_sections(); return this; }
/** Parses an ELF Segment (Program Header) Table and constructs and parses all segments reachable from the table. The section * is extended as necessary based on the number of entries and teh size of each entry. */ SgAsmElfSegmentTable * SgAsmElfSegmentTable::parse() { SgAsmGenericSection::parse(); SgAsmElfFileHeader *fhdr = dynamic_cast<SgAsmElfFileHeader*>(get_header()); ROSE_ASSERT(fhdr!=NULL); ByteOrder sex = fhdr->get_sex(); size_t ent_size, struct_size, opt_size, nentries; calculate_sizes(&ent_size, &struct_size, &opt_size, &nentries); ROSE_ASSERT(opt_size==fhdr->get_phextrasz() && nentries==fhdr->get_e_phnum()); /* If the current size is very small (0 or 1 byte) then we're coming straight from the constructor and the parsing should * also extend this section to hold all the entries. Otherwise the caller must have assigned a specific size for a good * reason and we should leave that alone, reading zeros if the entries extend beyond the defined size. */ if (get_size()<=1 && get_size()<nentries*ent_size) extend(nentries*ent_size - get_size()); rose_addr_t offset=0; /* w.r.t. the beginning of this section */ for (size_t i=0; i<nentries; i++, offset+=ent_size) { /* Read/decode the segment header */ SgAsmElfSegmentTableEntry *shdr = NULL; if (4==fhdr->get_word_size()) { SgAsmElfSegmentTableEntry::Elf32SegmentTableEntry_disk disk; read_content_local(offset, &disk, struct_size); shdr = new SgAsmElfSegmentTableEntry(sex, &disk); } else { SgAsmElfSegmentTableEntry::Elf64SegmentTableEntry_disk disk; read_content_local(offset, &disk, struct_size); shdr = new SgAsmElfSegmentTableEntry(sex, &disk); } shdr->set_index(i); if (opt_size>0) shdr->get_extra() = read_content_local_ucl(offset+struct_size, opt_size); /* Null segments are just unused slots in the table; no real section to create */ if (SgAsmElfSegmentTableEntry::PT_NULL == shdr->get_type()) continue; /* Create SgAsmElfSection objects for each ELF Segment. However, if the ELF Segment Table describes a segment * that's the same offset and size as a section from the Elf Section Table (and the memory mappings are * consistent) then use the preexisting section instead of creating a new one. */ SgAsmElfSection *s = NULL; SgAsmGenericSectionPtrList possible = fhdr->get_file()->get_sections_by_offset(shdr->get_offset(), shdr->get_filesz()); for (size_t j=0; !s && j<possible.size(); j++) { if (possible[j]->get_offset()!=shdr->get_offset() || possible[j]->get_size()!=shdr->get_filesz()) continue; /*different file extent*/ if (possible[j]->is_mapped()) { if (possible[j]->get_mapped_preferred_rva()!=shdr->get_vaddr() || possible[j]->get_mapped_size()!=shdr->get_memsz()) continue; /*different mapped address or size*/ unsigned section_perms = (possible[j]->get_mapped_rperm() ? 0x01 : 0x00) | (possible[j]->get_mapped_wperm() ? 0x02 : 0x00) | (possible[j]->get_mapped_xperm() ? 0x04 : 0x00); unsigned segment_perms = (shdr->get_flags() & SgAsmElfSegmentTableEntry::PF_RPERM ? 0x01 : 0x00) | (shdr->get_flags() & SgAsmElfSegmentTableEntry::PF_WPERM ? 0x02 : 0x00) | (shdr->get_flags() & SgAsmElfSegmentTableEntry::PF_XPERM ? 0x04 : 0x00); if (section_perms != segment_perms) continue; /*different mapped permissions*/ } /* Found a match. Set memory mapping params only. */ s = dynamic_cast<SgAsmElfSection*>(possible[j]); if (!s) continue; /*potential match was not from the ELF Section or Segment table*/ if (s->get_segment_entry()) continue; /*potential match is assigned to some other segment table entry*/ s->init_from_segment_table(shdr, true); /*true=>set memory mapping params only*/ } /* Create a new segment if no matching section was found. */ if (!s) { if (SgAsmElfSegmentTableEntry::PT_NOTE == shdr->get_type()) { s = new SgAsmElfNoteSection(fhdr); } else { s = new SgAsmElfSection(fhdr); } s->init_from_segment_table(shdr); s->parse(); } } return this; }
/** Initialize by parsing a file. */ SgAsmElfEHFrameSection * SgAsmElfEHFrameSection::parse() { SgAsmElfSection::parse(); SgAsmElfFileHeader *fhdr = get_elf_header(); ROSE_ASSERT(fhdr!=NULL); rose_addr_t record_offset=0; std::map<rose_addr_t, SgAsmElfEHFrameEntryCI*> cies; while (record_offset<get_size()) { rose_addr_t at = record_offset; unsigned char u8_disk; uint32_t u32_disk; uint64_t u64_disk; /* Length or extended length */ rose_addr_t length_field_size = 4; /*number of bytes not counted in length*/ read_content_local(at, &u32_disk, 4); at += 4; rose_addr_t record_size = disk_to_host(fhdr->get_sex(), u32_disk); if (record_size==0xffffffff) { read_content_local(at, &u64_disk, 8); at += 8; record_size = disk_to_host(fhdr->get_sex(), u64_disk); length_field_size += 8; /*FIXME: it's not entirely clear whether ExtendedLength includes this field*/ } if (0==record_size) break; /* Backward offset to CIE record, or zero if this is a CIE record. */ read_content_local(at, &u32_disk, 4); at += 4; rose_addr_t cie_back_offset = disk_to_host(fhdr->get_sex(), u32_disk); if (0==cie_back_offset) { /* This is a CIE record */ SgAsmElfEHFrameEntryCI *cie = new SgAsmElfEHFrameEntryCI(this); cies[record_offset] = cie; /* Version */ uint8_t cie_version; read_content_local(at++, &cie_version, 1); cie->set_version(cie_version); /* Augmentation String */ std::string astr = read_content_local_str(at); at += astr.size() + 1; cie->set_augmentation_string(astr); /* Alignment factors */ cie->set_code_alignment_factor(read_content_local_uleb128(&at)); cie->set_data_alignment_factor(read_content_local_sleb128(&at)); /* Augmentation data length. This is apparently the length of the data described by the Augmentation String plus * the Initial Instructions plus any padding. [RPM 2009-01-15] */ cie->set_augmentation_data_length(read_content_local_uleb128(&at)); /* Augmentation data. The format of the augmentation data in the CIE record is determined by reading the * characters of the augmentation string. */ if (!astr.empty() && astr[0]=='z') { for (size_t i=1; i<astr.size(); i++) { if ('L'==astr[i]) { read_content_local(at++, &u8_disk, 1); cie->set_lsda_encoding(u8_disk); } else if ('P'==astr[i]) { /* The first byte is an encoding method which describes the following bytes, which are the address of * a Personality Routine Handler. There appears to be very little documentation about these fields. */ read_content_local(at++, &u8_disk, 1); cie->set_prh_encoding(u8_disk); switch (cie->get_prh_encoding()) { case 0x05: /* See Ubuntu 32bit /usr/bin/aptitude */ case 0x06: /* See second CIE record for Gentoo-Amd64 /usr/bin/addftinfo */ case 0x07: /* See first CIE record for Gentoo-Amd64 /usr/bin/addftinfo */ read_content_local(at++, &u8_disk, 1); /* not sure what this is; arg for __gxx_personality_v0? */ cie->set_prh_arg(u8_disk); read_content_local(at, &u32_disk, 4); at+=4; /* address of <__gxx_personality_v0@plt> */ cie->set_prh_addr(ByteOrder::le_to_host(u32_disk)); break; case 0x09: /* *.o file generated by gcc-4.0.x */ /* FIXME: Cannot find any info about this entry. Fix SgAsmElfEHFrameSection::parse() if we * ever figure this out. [RPM 2009-09-29] */ /*fallthrough*/ default: { if (++nwarnings<=WARNING_LIMIT) { fprintf(stderr, "%s:%u: warning: ELF CIE 0x%08"PRIx64" has unknown PRH encoding 0x%02x\n", __FILE__, __LINE__, get_offset()+record_offset, cie->get_prh_encoding()); if (WARNING_LIMIT==nwarnings) fprintf(stderr, " (additional frame warnings will be suppressed)\n"); } break; } } } else if ('R'==astr[i]) { read_content_local(at++, &u8_disk, 1); cie->set_addr_encoding(u8_disk); } else if ('S'==astr[i]) { /* See http://lkml.indiana.edu/hypermail/linux/kernel/0602.3/1144.html and GCC PR #26208*/ cie->set_sig_frame(true); } else { /* Some stuff we don't handle yet. Warn about it and don't read anything. */ if (++nwarnings<=WARNING_LIMIT) { fprintf(stderr, "%s:%u: warning: ELF CIE 0x%08"PRIx64" has invalid augmentation string \"%s\"\n", __FILE__, __LINE__, get_offset()+record_offset, escapeString(astr).c_str()); if (WARNING_LIMIT==nwarnings) fprintf(stderr, " (additional frame warnings will be suppressed)\n"); } } } } /* Initial instructions. These are apparently included in the augmentation_data_length. The final instructions can * be zero padding (no-op instructions) to bring the record up to a multiple of the word size. */ rose_addr_t init_insn_size = (length_field_size + record_size) - (at - record_offset); cie->get_instructions() = read_content_local_ucl(at, init_insn_size); ROSE_ASSERT(cie->get_instructions().size()==init_insn_size); } else { /* This is a FDE record */ rose_addr_t cie_offset = record_offset + length_field_size - cie_back_offset; assert(cies.find(cie_offset)!=cies.end()); SgAsmElfEHFrameEntryCI *cie = cies[cie_offset]; SgAsmElfEHFrameEntryFD *fde = new SgAsmElfEHFrameEntryFD(cie); /* PC Begin (begin_rva) and size */ switch (cie->get_addr_encoding()) { case -1: /* No address encoding specified */ case 0x01: case 0x03: case 0x1b: /* Address doesn't look valid (e.g., 0xfffd74e8) but still four bytes [RPM 2008-01-16]*/ { read_content_local(at, &u32_disk, 4); at+=4; fde->set_begin_rva(ByteOrder::le_to_host(u32_disk)); read_content_local(at, &u32_disk, 4); at+=4; fde->set_size(ByteOrder::le_to_host(u32_disk)); break; } default: fprintf(stderr, "%s:%u: ELF CIE 0x%08"PRIx64", FDE 0x%08"PRIx64": unknown address encoding: 0x%02x\n", __FILE__, __LINE__, get_offset()+cie_offset, get_offset()+record_offset, cie->get_addr_encoding()); abort(); } /* Augmentation Data */ std::string astring = cie->get_augmentation_string(); if (astring.size()>0 && astring[0]=='z') { rose_addr_t aug_length = read_content_local_uleb128(&at); fde->get_augmentation_data() = read_content_local_ucl(at, aug_length); at += aug_length; ROSE_ASSERT(fde->get_augmentation_data().size()==aug_length); } /* Call frame instructions */ rose_addr_t cf_insn_size = (length_field_size + record_size) - (at - record_offset); fde->get_instructions() = read_content_local_ucl(at, cf_insn_size); ROSE_ASSERT(fde->get_instructions().size()==cf_insn_size); } record_offset += length_field_size + record_size; } return this; }
/** Initialize this header with information parsed from the file and construct and parse everything that's reachable from the * header. Since the size of the ELF File Header is determined by the contents of the ELF File Header as stored in the file, * the size of the ELF File Header will be adjusted upward if necessary. The ELF File Header should have been constructed * such that SgAsmElfFileHeader::ctor() was called. */ SgAsmElfFileHeader* SgAsmElfFileHeader::parse() { SgAsmGenericHeader::parse(); /* Read 32-bit header for now. Might need to re-read as 64-bit later. */ Elf32FileHeader_disk disk32; if (sizeof(disk32)>get_size()) extend(sizeof(disk32)-get_size()); read_content_local(0, &disk32, sizeof disk32, false); /*zero pad if we read EOF*/ /* Check magic number early */ if (disk32.e_ident_magic[0]!=0x7f || disk32.e_ident_magic[1]!='E' || disk32.e_ident_magic[2]!='L' || disk32.e_ident_magic[3]!='F') throw FormatError("Bad ELF magic number"); /* File byte order should be 1 or 2. However, we've seen at least one example that left the byte order at zero, implying * that it was the native order. We don't have the luxury of decoding the file on the native machine, so in that case we * try to infer the byte order by looking at one of the other multi-byte fields of the file. */ ByteOrder::Endianness sex; if (1 == disk32.e_ident_data_encoding) { sex = ByteOrder::ORDER_LSB; } else if (2==disk32.e_ident_data_encoding) { sex = ByteOrder::ORDER_MSB; } else if ((disk32.e_type & 0xff00)==0xff00) { /* One of the 0xffxx processor-specific flags in native order */ if ((disk32.e_type & 0x00ff)==0xff) throw FormatError("invalid ELF header byte order"); /*ambiguous*/ sex = ByteOrder::host_order(); } else if ((disk32.e_type & 0x00ff)==0x00ff) { /* One of the 0xffxx processor specific orders in reverse native order */ sex = ByteOrder::host_order()==ByteOrder::ORDER_LSB ? ByteOrder::ORDER_MSB : ByteOrder::ORDER_LSB; } else if ((disk32.e_type & 0xff00)==0) { /* One of the low-valued file types in native order */ if ((disk32.e_type & 0x00ff)==0) throw FormatError("invalid ELF header byte order"); /*ambiguous*/ sex = ByteOrder::host_order(); } else if ((disk32.e_type & 0x00ff)==0) { /* One of the low-valued file types in reverse native order */ sex = ByteOrder::host_order() == ByteOrder::ORDER_LSB ? ByteOrder::ORDER_MSB : ByteOrder::ORDER_LSB; } else { /* Ambiguous order */ throw FormatError("invalid ELF header byte order"); } ROSE_ASSERT(p_exec_format != NULL); p_exec_format->set_sex(sex); p_e_ident_data_encoding = disk32.e_ident_data_encoding; /*save original value*/ /* Decode header to native format */ rose_rva_t entry_rva, sectab_rva, segtab_rva; if (1 == disk32.e_ident_file_class) { p_exec_format->set_word_size(4); p_e_ident_padding.clear(); for (size_t i=0; i<sizeof(disk32.e_ident_padding); i++) p_e_ident_padding.push_back(disk32.e_ident_padding[i]); p_e_ident_file_class = ByteOrder::disk_to_host(sex, disk32.e_ident_file_class); p_e_ident_file_version = ByteOrder::disk_to_host(sex, disk32.e_ident_file_version); p_e_type = ByteOrder::disk_to_host(sex, disk32.e_type); p_e_machine = ByteOrder::disk_to_host(sex, disk32.e_machine); p_exec_format->set_version(ByteOrder::disk_to_host(sex, disk32.e_version)); entry_rva = ByteOrder::disk_to_host(sex, disk32.e_entry); segtab_rva = ByteOrder::disk_to_host(sex, disk32.e_phoff); sectab_rva = ByteOrder::disk_to_host(sex, disk32.e_shoff); p_e_flags = ByteOrder::disk_to_host(sex, disk32.e_flags); p_e_ehsize = ByteOrder::disk_to_host(sex, disk32.e_ehsize); p_e_phnum = ByteOrder::disk_to_host(sex, disk32.e_phnum); if (p_e_phnum>0) { p_phextrasz = ByteOrder::disk_to_host(sex, disk32.e_phentsize); ROSE_ASSERT(p_phextrasz>=sizeof(SgAsmElfSegmentTableEntry::Elf32SegmentTableEntry_disk)); p_phextrasz -= sizeof(SgAsmElfSegmentTableEntry::Elf32SegmentTableEntry_disk); } else { p_phextrasz = 0; } p_e_shnum = ByteOrder::disk_to_host(sex, disk32.e_shnum); if (p_e_shnum>0) { p_shextrasz = ByteOrder::disk_to_host(sex, disk32.e_shentsize); ROSE_ASSERT(p_shextrasz>=sizeof(SgAsmElfSectionTableEntry::Elf32SectionTableEntry_disk)); p_shextrasz -= sizeof(SgAsmElfSectionTableEntry::Elf32SectionTableEntry_disk); } else { p_shextrasz = 0; } p_e_shstrndx = ByteOrder::disk_to_host(sex, disk32.e_shstrndx); } else if (2 == disk32.e_ident_file_class) { /* We guessed wrong. This is a 64-bit header, not 32-bit. */ p_exec_format->set_word_size(8); Elf64FileHeader_disk disk64; if (sizeof(disk64)>get_size()) extend(sizeof(disk64)-get_size()); read_content_local(0, &disk64, sizeof disk64, false); /*zero pad at EOF*/ p_e_ident_padding.clear(); for (size_t i=0; i<sizeof(disk64.e_ident_padding); i++) p_e_ident_padding.push_back(disk64.e_ident_padding[i]); p_e_ident_file_class = ByteOrder::disk_to_host(sex, disk64.e_ident_file_class); p_e_ident_file_version = ByteOrder::disk_to_host(sex, disk64.e_ident_file_version); p_e_type = ByteOrder::disk_to_host(sex, disk64.e_type); p_e_machine = ByteOrder::disk_to_host(sex, disk64.e_machine); p_exec_format->set_version(ByteOrder::disk_to_host(sex, disk64.e_version)); entry_rva = ByteOrder::disk_to_host(sex, disk64.e_entry); segtab_rva = ByteOrder::disk_to_host(sex, disk64.e_phoff); sectab_rva = ByteOrder::disk_to_host(sex, disk64.e_shoff); p_e_flags = ByteOrder::disk_to_host(sex, disk64.e_flags); p_e_ehsize = ByteOrder::disk_to_host(sex, disk64.e_ehsize); p_e_phnum = ByteOrder::disk_to_host(sex, disk64.e_phnum); if (p_e_phnum>0) { p_phextrasz = ByteOrder::disk_to_host(sex, disk64.e_phentsize); ROSE_ASSERT(p_phextrasz>=sizeof(SgAsmElfSegmentTableEntry::Elf64SegmentTableEntry_disk)); p_phextrasz -= sizeof(SgAsmElfSegmentTableEntry::Elf64SegmentTableEntry_disk); } else { p_phextrasz = 0; } p_e_shnum = ByteOrder::disk_to_host(sex, disk64.e_shnum); if (p_e_shnum>0) { p_shextrasz = ByteOrder::disk_to_host(sex, disk64.e_shentsize); ROSE_ASSERT(p_shextrasz>=sizeof(SgAsmElfSectionTableEntry::Elf64SectionTableEntry_disk)); p_shextrasz -= sizeof(SgAsmElfSectionTableEntry::Elf64SectionTableEntry_disk); } else { p_shextrasz = 0; } p_e_shstrndx = ByteOrder::disk_to_host(sex, disk64.e_shstrndx); } else { throw FormatError("invalid ELF header file class"); } /* Magic number. disk32 and disk64 have header bytes at same offset */ p_magic.clear(); for (size_t i=0; i<sizeof(disk32.e_ident_magic); i++) p_magic.push_back(disk32.e_ident_magic[i]); /* File format */ p_exec_format->set_family(FAMILY_ELF); switch (p_e_type) { case 0: p_exec_format->set_purpose(PURPOSE_UNSPECIFIED); break; case 1: case 3: p_exec_format->set_purpose(PURPOSE_LIBRARY); break; case 2: p_exec_format->set_purpose(PURPOSE_EXECUTABLE); break; case 4: p_exec_format->set_purpose(PURPOSE_CORE_DUMP); break; default: if (p_e_type >= 0xff00 && p_e_type <= 0xffff) { p_exec_format->set_purpose(PURPOSE_PROC_SPECIFIC); } else { p_exec_format->set_purpose(PURPOSE_OTHER); } break; } p_exec_format->set_is_current_version(1 == p_exec_format->get_version()); p_exec_format->set_abi(ABI_UNSPECIFIED); /* ELF specifies a target architecture rather than an ABI */ p_exec_format->set_abi_version(0); /* Target architecture */ set_isa(machine_to_isa(p_e_machine)); /* Read the optional section and segment tables and the sections to which they point. An empty section or segment table is * treated as if it doesn't exist. This seems to be compatible with the loader since the 45-bit "tiny" ELF executable * stores a zero in the e_shnum member and a completely invalid value in the e_shoff member. */ if (sectab_rva>0 && get_e_shnum()>0) { SgAsmElfSectionTable *tab = new SgAsmElfSectionTable(this); tab->set_offset(sectab_rva.get_rva()); tab->parse(); } if (segtab_rva>0 && get_e_phnum()>0) { SgAsmElfSegmentTable *tab = new SgAsmElfSegmentTable(this); tab->set_offset(segtab_rva.get_rva()); tab->parse(); } /* Associate the entry point with a particular section. */ entry_rva.bind(this); add_entry_rva(entry_rva); return this; }
/** Parses an ELF Section Table and constructs and parses all sections reachable from the table. The section is extended as * necessary based on the number of entries and the size of each entry. */ SgAsmElfSectionTable * SgAsmElfSectionTable::parse() { SgAsmGenericSection::parse(); SgAsmElfFileHeader *fhdr = dynamic_cast<SgAsmElfFileHeader*>(get_header()); ROSE_ASSERT(fhdr!=NULL); ByteOrder sex = fhdr->get_sex(); size_t ent_size, struct_size, opt_size, nentries; calculate_sizes(&ent_size, &struct_size, &opt_size, &nentries); ROSE_ASSERT(opt_size==fhdr->get_shextrasz() && nentries==fhdr->get_e_shnum()); /* If the current size is very small (0 or 1 byte) then we're coming straight from the constructor and the parsing should * also extend this section to hold all the entries. Otherwise the caller must have assigned a specific size for a good * reason and we should leave that alone, reading zeros if the entries extend beyond the defined size. */ if (get_size()<=1 && get_size()<nentries*ent_size) extend(nentries*ent_size - get_size()); /* Read all the section headers. */ std::vector<SgAsmElfSectionTableEntry*> entries; rose_addr_t offset = 0; for (size_t i=0; i<nentries; i++, offset+=ent_size) { SgAsmElfSectionTableEntry *shdr = NULL; if (4 == fhdr->get_word_size()) { SgAsmElfSectionTableEntry::Elf32SectionTableEntry_disk disk; read_content_local(offset, &disk, struct_size); shdr = new SgAsmElfSectionTableEntry(sex, &disk); } else { SgAsmElfSectionTableEntry::Elf64SectionTableEntry_disk disk; read_content_local(offset, &disk, struct_size); shdr = new SgAsmElfSectionTableEntry(sex, &disk); } if (opt_size>0) shdr->get_extra() = read_content_local_ucl(offset+struct_size, opt_size); entries.push_back(shdr); } /* This vector keeps track of which sections have already been parsed. We could get the same information by calling * fhdr->get_section_by_id() and passing the entry number since entry numbers and IDs are one and the same in ELF. However, * this is a bit easier. */ std::vector<SgAsmElfSection*> is_parsed; is_parsed.resize(entries.size(), NULL); /* All sections implicitly depend on the section string table for their names. */ SgAsmElfStringSection *section_name_strings=NULL; if (fhdr->get_e_shstrndx() > 0) { SgAsmElfSectionTableEntry *entry = entries[fhdr->get_e_shstrndx()]; section_name_strings = new SgAsmElfStringSection(fhdr); section_name_strings->init_from_section_table(entry, section_name_strings, fhdr->get_e_shstrndx()); section_name_strings->parse(); is_parsed[fhdr->get_e_shstrndx()] = section_name_strings; } /* Read all the sections. Some sections depend on other sections, so we read them in such an order that all dependencies * are satisfied first. */ while (1) { bool try_again=false; for (size_t i=0; i<entries.size(); i++) { SgAsmElfSectionTableEntry *entry = entries[i]; ROSE_ASSERT(entry->get_sh_link()<entries.size()); /* Some sections might reference another section through the sh_link member. */ bool need_linked = entry->get_sh_link() > 0; ROSE_ASSERT(!need_linked || entry->get_sh_link()<entries.size()); SgAsmElfSection *linked = need_linked ? is_parsed[entry->get_sh_link()] : NULL; /* Relocation sections might have a second linked section stored in sh_info. */ bool need_info_linked = (entry->get_sh_type() == SgAsmElfSectionTableEntry::SHT_REL || entry->get_sh_type() == SgAsmElfSectionTableEntry::SHT_RELA) && entry->get_sh_info() > 0; ROSE_ASSERT(!need_info_linked || entry->get_sh_info()<entries.size()); SgAsmElfSection *info_linked = need_info_linked ? is_parsed[entry->get_sh_info()] : NULL; if (is_parsed[i]) { /* This section has already been parsed. */ } else if ((need_linked && !linked) || (need_info_linked && !info_linked)) { /* Don't parse this section yet because it depends on something that's not parsed yet. */ try_again = true; } else { switch (entry->get_sh_type()) { case SgAsmElfSectionTableEntry::SHT_NULL: /* Null entry. We still create the section just to hold the section header. */ is_parsed[i] = new SgAsmElfSection(fhdr); break; case SgAsmElfSectionTableEntry::SHT_NOBITS: /* These types of sections don't occupy any file space (e.g., BSS) */ is_parsed[i] = new SgAsmElfSection(fhdr); break; case SgAsmElfSectionTableEntry::SHT_DYNAMIC: { SgAsmElfStringSection *strsec = dynamic_cast<SgAsmElfStringSection*>(linked); ROSE_ASSERT(strsec); is_parsed[i] = new SgAsmElfDynamicSection(fhdr, strsec); break; } case SgAsmElfSectionTableEntry::SHT_DYNSYM: { SgAsmElfStringSection *strsec = dynamic_cast<SgAsmElfStringSection*>(linked); ROSE_ASSERT(strsec); SgAsmElfSymbolSection *symsec = new SgAsmElfSymbolSection(fhdr, strsec); symsec->set_is_dynamic(true); is_parsed[i] = symsec; break; } case SgAsmElfSectionTableEntry::SHT_SYMTAB: { SgAsmElfStringSection *strsec = dynamic_cast<SgAsmElfStringSection*>(linked); ROSE_ASSERT(strsec); SgAsmElfSymbolSection *symsec = new SgAsmElfSymbolSection(fhdr, strsec); symsec->set_is_dynamic(false); is_parsed[i] = symsec; break; } case SgAsmElfSectionTableEntry::SHT_STRTAB: is_parsed[i] = new SgAsmElfStringSection(fhdr); break; case SgAsmElfSectionTableEntry::SHT_REL: { SgAsmElfSymbolSection *symbols = dynamic_cast<SgAsmElfSymbolSection*>(linked); SgAsmElfRelocSection *relocsec = new SgAsmElfRelocSection(fhdr, symbols, info_linked); relocsec->set_uses_addend(false); is_parsed[i] = relocsec; break; } case SgAsmElfSectionTableEntry::SHT_RELA: { SgAsmElfSymbolSection *symbols = dynamic_cast<SgAsmElfSymbolSection*>(linked); SgAsmElfRelocSection *relocsec = new SgAsmElfRelocSection(fhdr, symbols, info_linked); relocsec->set_uses_addend(true); is_parsed[i] = relocsec; break; } case SgAsmElfSectionTableEntry::SHT_PROGBITS: { if (!section_name_strings) { fprintf(stderr, "SgAsmElfSectionTable::parse(): no string table for section table\n"); is_parsed[i] = new SgAsmElfSection(fhdr); } else { std::string section_name = section_name_strings->read_content_local_str(entry->get_sh_name()); if (section_name == ".eh_frame") { is_parsed[i] = new SgAsmElfEHFrameSection(fhdr); } else { is_parsed[i] = new SgAsmElfSection(fhdr); } } break; } case SgAsmElfSectionTableEntry::SHT_GNU_versym: { is_parsed[i] = new SgAsmElfSymverSection(fhdr); break; } case SgAsmElfSectionTableEntry::SHT_GNU_verdef: { SgAsmElfStringSection *strsec = dynamic_cast<SgAsmElfStringSection*>(linked); ROSE_ASSERT(strsec); is_parsed[i] = new SgAsmElfSymverDefinedSection(fhdr,strsec); break; } case SgAsmElfSectionTableEntry::SHT_GNU_verneed: { SgAsmElfStringSection *strsec = dynamic_cast<SgAsmElfStringSection*>(linked); ROSE_ASSERT(strsec); is_parsed[i] = new SgAsmElfSymverNeededSection(fhdr,strsec); break; } default: is_parsed[i] = new SgAsmElfSection(fhdr); break; } is_parsed[i]->init_from_section_table(entry, section_name_strings, i); is_parsed[i]->parse(); } } if (!try_again) break; } /* Initialize links between sections */ for (size_t i = 0; i < entries.size(); i++) { SgAsmElfSectionTableEntry *shdr = entries[i]; if (shdr->get_sh_link() > 0) { SgAsmElfSection *source = isSgAsmElfSection(fhdr->get_file()->get_section_by_id(i)); SgAsmElfSection *target = isSgAsmElfSection(fhdr->get_file()->get_section_by_id(shdr->get_sh_link())); assert(source); /* because we created it above */ source->set_linked_section(target); } } /* Finish parsing sections now that we have basic info for all the sections. */ for (size_t i=0; i<is_parsed.size(); i++) is_parsed[i]->finish_parsing(); return this; }