Example #1
0
/** Initialize the header with information parsed from the file and construct and parse everything that's reachable from the
 *  header. The PE File Header should have been constructed such that SgAsmPEFileHeader::ctor() was called. */
SgAsmPEFileHeader*
SgAsmPEFileHeader::parse()
{
    SgAsmGenericHeader::parse();
    
    /* Read header, zero padding if the file isn't large enough */
    PEFileHeader_disk fh;
    if (sizeof(fh)>get_size())
        extend(sizeof(fh)-get_size());
    if (sizeof(fh)!=read_content_local(0, &fh, sizeof fh, false))
        fprintf(stderr, "SgAsmPEFileHeader::parse: warning: short read of PE header at byte 0x%08"PRIx64"\n", get_offset());

    /* Check magic number before getting too far */
    if (fh.e_magic[0]!='P' || fh.e_magic[1]!='E' || fh.e_magic[2]!='\0' || fh.e_magic[3]!='\0')
        throw FormatError("Bad PE magic number");

    /* Decode COFF file header */
    p_e_cpu_type           = le_to_host(fh.e_cpu_type);
    p_e_nsections          = le_to_host(fh.e_nsections);
    p_e_time               = le_to_host(fh.e_time);
    p_e_coff_symtab        = le_to_host(fh.e_coff_symtab);
    p_e_coff_nsyms         = le_to_host(fh.e_coff_nsyms);
    p_e_nt_hdr_size        = le_to_host(fh.e_nt_hdr_size);
    p_e_flags              = le_to_host(fh.e_flags);

    /* Read the "Optional Header" (optional in the sense that not all files have one, but required for an executable), the
     * size of which is stored in the e_nt_hdr_size of the main PE file header. According to 
     * http://www.phreedom.org/solar/code/tinype the Windows loader honors the e_nt_hdr_size even when set to smaller than the
     * smallest possible documented size of the optional header. Also it's possible for the optional header to extend beyond
     * the end of the file, in which case that part should be read as zero. */
    PE32OptHeader_disk oh32;
    rose_addr_t need32 = sizeof(PEFileHeader_disk) + std::min(p_e_nt_hdr_size, (rose_addr_t)(sizeof oh32));
    if (need32>get_size())
        extend(need32-get_size());
    if (sizeof(oh32)!=read_content_local(sizeof fh, &oh32, sizeof oh32, false))
        fprintf(stderr, "SgAsmPEFileHeader::parse: warning: short read of PE Optional Header at byte 0x%08"PRIx64"\n", 
                get_offset() + sizeof(fh));
    p_e_opt_magic = le_to_host(oh32.e_opt_magic);
    
    /* File format changes from ctor() */
    p_exec_format->set_purpose(p_e_flags & HF_PROGRAM ? PURPOSE_EXECUTABLE : PURPOSE_LIBRARY);
    p_exec_format->set_word_size(0x010b==p_e_opt_magic? 4 : 8);

    /* Decode the optional header. */
    rose_addr_t entry_rva;
    if (4==p_exec_format->get_word_size()) {
        p_e_lmajor             = le_to_host(oh32.e_lmajor);
        p_e_lminor             = le_to_host(oh32.e_lminor);
        p_e_code_size          = le_to_host(oh32.e_code_size);
        p_e_data_size          = le_to_host(oh32.e_data_size);
        p_e_bss_size           = le_to_host(oh32.e_bss_size);
        entry_rva              = le_to_host(oh32.e_entrypoint_rva);
        p_e_code_rva           = le_to_host(oh32.e_code_rva);
        p_e_data_rva           = le_to_host(oh32.e_data_rva);
        p_base_va              = le_to_host(oh32.e_image_base);
        p_e_section_align      = le_to_host(oh32.e_section_align);
        p_e_file_align         = le_to_host(oh32.e_file_align);
        p_e_os_major           = le_to_host(oh32.e_os_major);
        p_e_os_minor           = le_to_host(oh32.e_os_minor);
        p_e_user_major         = le_to_host(oh32.e_user_major);
        p_e_user_minor         = le_to_host(oh32.e_user_minor);
        p_e_subsys_major       = le_to_host(oh32.e_subsys_major);
        p_e_subsys_minor       = le_to_host(oh32.e_subsys_minor);
        p_e_reserved9          = le_to_host(oh32.e_reserved9);
        p_e_image_size         = le_to_host(oh32.e_image_size);
        p_e_header_size        = le_to_host(oh32.e_header_size);
        p_e_file_checksum      = le_to_host(oh32.e_file_checksum);
        p_e_subsystem          = le_to_host(oh32.e_subsystem);
        p_e_dll_flags          = le_to_host(oh32.e_dll_flags);
        p_e_stack_reserve_size = le_to_host(oh32.e_stack_reserve_size);
        p_e_stack_commit_size  = le_to_host(oh32.e_stack_commit_size);
        p_e_heap_reserve_size  = le_to_host(oh32.e_heap_reserve_size);
        p_e_heap_commit_size   = le_to_host(oh32.e_heap_commit_size);
        p_e_loader_flags       = le_to_host(oh32.e_loader_flags);
        p_e_num_rvasize_pairs  = le_to_host(oh32.e_num_rvasize_pairs);
    } else if (8==p_exec_format->get_word_size()) {
        /* We guessed wrong. This is a 64-bit header, not 32-bit. */
        PE64OptHeader_disk oh64;
        rose_addr_t need64 = sizeof(PEFileHeader_disk) + std::min(p_e_nt_hdr_size, (rose_addr_t)(sizeof oh64));
        if (need64>get_size())
            extend(need64-get_size());
        if (sizeof(oh64)!=read_content_local(sizeof fh, &oh64, sizeof oh64))
            fprintf(stderr, "SgAsmPEFileHeader::parse: warning: short read of PE Optional Header at byte 0x%08"PRIx64"\n", 
                    get_offset() + sizeof(fh));
        p_e_lmajor             = le_to_host(oh64.e_lmajor);
        p_e_lminor             = le_to_host(oh64.e_lminor);
        p_e_code_size          = le_to_host(oh64.e_code_size);
        p_e_data_size          = le_to_host(oh64.e_data_size);
        p_e_bss_size           = le_to_host(oh64.e_bss_size);
        entry_rva              = le_to_host(oh64.e_entrypoint_rva);
        p_e_code_rva           = le_to_host(oh64.e_code_rva);
     // p_e_data_rva           = le_to_host(oh.e_data_rva);             /* not in PE32+ */
        p_base_va              = le_to_host(oh64.e_image_base);
        p_e_section_align      = le_to_host(oh64.e_section_align);
        p_e_file_align         = le_to_host(oh64.e_file_align);
        p_e_os_major           = le_to_host(oh64.e_os_major);
        p_e_os_minor           = le_to_host(oh64.e_os_minor);
        p_e_user_major         = le_to_host(oh64.e_user_major);
        p_e_user_minor         = le_to_host(oh64.e_user_minor);
        p_e_subsys_major       = le_to_host(oh64.e_subsys_major);
        p_e_subsys_minor       = le_to_host(oh64.e_subsys_minor);
        p_e_reserved9          = le_to_host(oh64.e_reserved9);
        p_e_image_size         = le_to_host(oh64.e_image_size);
        p_e_header_size        = le_to_host(oh64.e_header_size);
        p_e_file_checksum      = le_to_host(oh64.e_file_checksum);
        p_e_subsystem          = le_to_host(oh64.e_subsystem);
        p_e_dll_flags          = le_to_host(oh64.e_dll_flags);
        p_e_stack_reserve_size = le_to_host(oh64.e_stack_reserve_size);
        p_e_stack_commit_size  = le_to_host(oh64.e_stack_commit_size);
        p_e_heap_reserve_size  = le_to_host(oh64.e_heap_reserve_size);
        p_e_heap_commit_size   = le_to_host(oh64.e_heap_commit_size);
        p_e_loader_flags       = le_to_host(oh64.e_loader_flags);
        p_e_num_rvasize_pairs  = le_to_host(oh64.e_num_rvasize_pairs);
    } else {
        throw FormatError("unrecognized Windows PE optional header magic number");
    }

    /* Magic number */
    p_magic.clear();
    for (size_t i = 0; i < sizeof(fh.e_magic); ++i)
        p_magic.push_back(fh.e_magic[i]);

    /* File format */
    ROSE_ASSERT(p_e_lmajor <= 0xffff && p_e_lminor <= 0xffff);
    p_exec_format->set_version((p_e_lmajor << 16) | p_e_lminor);
    p_exec_format->set_is_current_version(true); /*FIXME*/

    /* Target architecture */
    switch (p_e_cpu_type) {
      case 0x0000:
        set_isa(ISA_UNSPECIFIED);
        break;
      case 0x014c:
        set_isa(ISA_IA32_386);
        break;
      case 0x014d:
        set_isa(ISA_IA32_486);
        break;
      case 0x014e:
        set_isa(ISA_IA32_Pentium);
        break;
      case 0x0162:
        set_isa(ISA_MIPS_MarkI); /* R2000, R3000 */
        break;
      case 0x0163:
        set_isa(ISA_MIPS_MarkII); /* R6000 */
        break;
      case 0x0166:
        set_isa(ISA_MIPS_MarkIII); /* R4000 */
        break;
      case 0x01a2: /*Hitachi SH3*/
      case 0x01a3: /*Hitachi SH3 with FPU*/
      case 0x01a6: /*Hitachi SH4*/
      case 0x01a8: /*Hitachi SH5*/
        set_isa(ISA_Hitachi_SH);
        break;
      case 0x01c0:
        set_isa(ISA_ARM_Family);
        break;
      case 0x01d3:
        set_isa(ISA_Matsushita_AM33);
        break;
      case 0x01f0: /*w/o FPU*/
      case 0x01f1: /*with FPU*/
        set_isa(ISA_PowerPC);
        break;
      case 0x0200:
        set_isa(ISA_IA64_Family);
        break;
      case 0x0266:
        set_isa(ISA_MIPS_16);
        break;
      case 0x0366:
        set_isa(ISA_MIPS_FPU);
        break;
      case 0x0466:
        set_isa(ISA_MIPS_16FPU);
        break;
      case 0x0ebc:
        set_isa(ISA_EFI_ByteCode);
        break;
      case 0x8664:
        set_isa(ISA_X8664_Family);
        break;
      case 0x9041:
        set_isa(ISA_Mitsubishi_M32R);
        break;
      default:
        fprintf(stderr, "SgAsmPEFileHeader::parse: warning: unrecognized e_cputype = 0x%x (%u)\n", p_e_cpu_type, p_e_cpu_type);
        set_isa(ISA_OTHER);
        break;
    }

    /* The NT loader normally maps this file header at the header's base virtual address. */
    set_mapped_preferred_rva(0);
    set_mapped_actual_va(0); /* will be assigned by BinaryLoader */
    set_mapped_size(p_e_header_size);
    set_mapped_alignment(0);
    set_mapped_rperm(true);
    set_mapped_wperm(false);
    set_mapped_xperm(false);

    /* Entry point. We will eventually bind the entry point to a particular section (in SgAsmPEFileHeader::parse) so that if
     * sections are rearranged, extended, etc. the entry point will be updated automatically. */
    add_entry_rva(entry_rva);

    /* The PE File Header has a fixed-size component followed by some number of RVA/Size pairs. The add_rvasize_pairs() will
     * extend  the header and parse the RVA/Size pairs. */
    if (get_e_num_rvasize_pairs() > 1000) {
        fprintf(stderr, "warning: PE File Header contains an unreasonable number of Rva/Size pairs. Limiting to 1000.\n");
        set_e_num_rvasize_pairs(1000);
    }
    add_rvasize_pairs();

    /* Construct the section table and its sections (non-synthesized sections). The specification says that the section table
     * comes after the optional (NT) header, which in turn comes after the fixed part of the PE header. The size of the
     * optional header is indicated in the fixed header. */
    rose_addr_t secttab_offset = get_offset() + sizeof(PEFileHeader_disk) + get_e_nt_hdr_size();
    rose_addr_t secttab_size = get_e_nsections() * sizeof(SgAsmPESectionTableEntry::PESectionTableEntry_disk);
    SgAsmPESectionTable *secttab = new SgAsmPESectionTable(this);
    secttab->set_offset(secttab_offset);
    secttab->set_size(secttab_size);
    secttab->parse();
    set_section_table(secttab);

    /* Parse the COFF symbol table */
    if (get_e_coff_symtab() && get_e_coff_nsyms()) {
        SgAsmCoffSymbolTable *symtab = new SgAsmCoffSymbolTable(this);
        symtab->set_offset(get_e_coff_symtab());
        symtab->parse();
        set_coff_symtab(symtab);
    }

    /* Associate RVAs with particular sections so that if a section's mapping is changed the RVA gets adjusted automatically. */
    ROSE_ASSERT(get_entry_rvas().size()==1);
    get_entry_rvas()[0].bind(this);
    set_e_code_rva(get_e_code_rva().bind(this));
    set_e_data_rva(get_e_data_rva().bind(this));

    /* Turn header-specified tables (RVA/Size pairs) into generic sections */
    create_table_sections();
    return this;
}
Example #2
0
/** Initialize this header with information parsed from the file and construct and parse everything that's reachable from the
 *  header. Since the size of the ELF File Header is determined by the contents of the ELF File Header as stored in the file,
 *  the size of the ELF File Header will be adjusted upward if necessary. The ELF File Header should have been constructed
 *  such that SgAsmElfFileHeader::ctor() was called. */
SgAsmElfFileHeader*
SgAsmElfFileHeader::parse()
{
    SgAsmGenericHeader::parse();

    /* Read 32-bit header for now. Might need to re-read as 64-bit later. */
    Elf32FileHeader_disk disk32;
    if (sizeof(disk32)>get_size())
        extend(sizeof(disk32)-get_size());
    read_content_local(0, &disk32, sizeof disk32, false); /*zero pad if we read EOF*/

    /* Check magic number early */
    if (disk32.e_ident_magic[0]!=0x7f || disk32.e_ident_magic[1]!='E' ||
        disk32.e_ident_magic[2]!='L'  || disk32.e_ident_magic[3]!='F')
        throw FormatError("Bad ELF magic number");

    /* File byte order should be 1 or 2. However, we've seen at least one example that left the byte order at zero, implying
     * that it was the native order. We don't have the luxury of decoding the file on the native machine, so in that case we
     * try to infer the byte order by looking at one of the other multi-byte fields of the file. */
    ByteOrder::Endianness sex;
    if (1 == disk32.e_ident_data_encoding) {
        sex = ByteOrder::ORDER_LSB;
    } else if (2==disk32.e_ident_data_encoding) {
        sex = ByteOrder::ORDER_MSB;
    } else if ((disk32.e_type & 0xff00)==0xff00) {
        /* One of the 0xffxx processor-specific flags in native order */
        if ((disk32.e_type & 0x00ff)==0xff)
            throw FormatError("invalid ELF header byte order"); /*ambiguous*/
        sex = ByteOrder::host_order();
    } else if ((disk32.e_type & 0x00ff)==0x00ff) {
        /* One of the 0xffxx processor specific orders in reverse native order */
        sex = ByteOrder::host_order()==ByteOrder::ORDER_LSB ? ByteOrder::ORDER_MSB : ByteOrder::ORDER_LSB;
    } else if ((disk32.e_type & 0xff00)==0) {
        /* One of the low-valued file types in native order */
        if ((disk32.e_type & 0x00ff)==0)
            throw FormatError("invalid ELF header byte order"); /*ambiguous*/
        sex = ByteOrder::host_order();
    } else if ((disk32.e_type & 0x00ff)==0) {
        /* One of the low-valued file types in reverse native order */
        sex = ByteOrder::host_order() == ByteOrder::ORDER_LSB ? ByteOrder::ORDER_MSB : ByteOrder::ORDER_LSB;
    } else {
        /* Ambiguous order */
        throw FormatError("invalid ELF header byte order");
    }
    ROSE_ASSERT(p_exec_format != NULL);
    p_exec_format->set_sex(sex);
    p_e_ident_data_encoding = disk32.e_ident_data_encoding; /*save original value*/

    /* Decode header to native format */
    rose_rva_t entry_rva, sectab_rva, segtab_rva;
    if (1 == disk32.e_ident_file_class) {
        p_exec_format->set_word_size(4);

        p_e_ident_padding.clear();
        for (size_t i=0; i<sizeof(disk32.e_ident_padding); i++)
             p_e_ident_padding.push_back(disk32.e_ident_padding[i]);

        p_e_ident_file_class    = ByteOrder::disk_to_host(sex, disk32.e_ident_file_class);
        p_e_ident_file_version  = ByteOrder::disk_to_host(sex, disk32.e_ident_file_version);
        p_e_type                = ByteOrder::disk_to_host(sex, disk32.e_type);
        p_e_machine             = ByteOrder::disk_to_host(sex, disk32.e_machine);
        p_exec_format->set_version(ByteOrder::disk_to_host(sex, disk32.e_version));
        entry_rva               = ByteOrder::disk_to_host(sex, disk32.e_entry);
        segtab_rva              = ByteOrder::disk_to_host(sex, disk32.e_phoff);
        sectab_rva              = ByteOrder::disk_to_host(sex, disk32.e_shoff);
        p_e_flags               = ByteOrder::disk_to_host(sex, disk32.e_flags);
        p_e_ehsize              = ByteOrder::disk_to_host(sex, disk32.e_ehsize);

        p_e_phnum               = ByteOrder::disk_to_host(sex, disk32.e_phnum);
        if (p_e_phnum>0) {
            p_phextrasz         = ByteOrder::disk_to_host(sex, disk32.e_phentsize);
            ROSE_ASSERT(p_phextrasz>=sizeof(SgAsmElfSegmentTableEntry::Elf32SegmentTableEntry_disk));
            p_phextrasz -= sizeof(SgAsmElfSegmentTableEntry::Elf32SegmentTableEntry_disk);
        } else {
            p_phextrasz = 0;
        }

        p_e_shnum               = ByteOrder::disk_to_host(sex, disk32.e_shnum);
        if (p_e_shnum>0) {
            p_shextrasz         = ByteOrder::disk_to_host(sex, disk32.e_shentsize);
            ROSE_ASSERT(p_shextrasz>=sizeof(SgAsmElfSectionTableEntry::Elf32SectionTableEntry_disk));
            p_shextrasz -= sizeof(SgAsmElfSectionTableEntry::Elf32SectionTableEntry_disk);
        } else {
            p_shextrasz = 0;
        }

        p_e_shstrndx            = ByteOrder::disk_to_host(sex, disk32.e_shstrndx);
    } else if (2 == disk32.e_ident_file_class) {
        /* We guessed wrong. This is a 64-bit header, not 32-bit. */
        p_exec_format->set_word_size(8);
        Elf64FileHeader_disk disk64;
        if (sizeof(disk64)>get_size())
            extend(sizeof(disk64)-get_size());
        read_content_local(0, &disk64, sizeof disk64, false); /*zero pad at EOF*/

        p_e_ident_padding.clear();
        for (size_t i=0; i<sizeof(disk64.e_ident_padding); i++)
             p_e_ident_padding.push_back(disk64.e_ident_padding[i]);

        p_e_ident_file_class    = ByteOrder::disk_to_host(sex, disk64.e_ident_file_class);
        p_e_ident_file_version  = ByteOrder::disk_to_host(sex, disk64.e_ident_file_version);
        p_e_type                = ByteOrder::disk_to_host(sex, disk64.e_type);
        p_e_machine             = ByteOrder::disk_to_host(sex, disk64.e_machine);
        p_exec_format->set_version(ByteOrder::disk_to_host(sex, disk64.e_version));
        entry_rva               = ByteOrder::disk_to_host(sex, disk64.e_entry);
        segtab_rva              = ByteOrder::disk_to_host(sex, disk64.e_phoff);
        sectab_rva              = ByteOrder::disk_to_host(sex, disk64.e_shoff);
        p_e_flags               = ByteOrder::disk_to_host(sex, disk64.e_flags);
        p_e_ehsize              = ByteOrder::disk_to_host(sex, disk64.e_ehsize);

        p_e_phnum               = ByteOrder::disk_to_host(sex, disk64.e_phnum);
        if (p_e_phnum>0) {
            p_phextrasz         = ByteOrder::disk_to_host(sex, disk64.e_phentsize);
            ROSE_ASSERT(p_phextrasz>=sizeof(SgAsmElfSegmentTableEntry::Elf64SegmentTableEntry_disk));
            p_phextrasz -= sizeof(SgAsmElfSegmentTableEntry::Elf64SegmentTableEntry_disk);
        } else {
            p_phextrasz = 0;
        }

        p_e_shnum               = ByteOrder::disk_to_host(sex, disk64.e_shnum);
        if (p_e_shnum>0) {
            p_shextrasz         = ByteOrder::disk_to_host(sex, disk64.e_shentsize);
            ROSE_ASSERT(p_shextrasz>=sizeof(SgAsmElfSectionTableEntry::Elf64SectionTableEntry_disk));
            p_shextrasz -= sizeof(SgAsmElfSectionTableEntry::Elf64SectionTableEntry_disk);
        } else {
            p_shextrasz = 0;
        }

        p_e_shstrndx            = ByteOrder::disk_to_host(sex, disk64.e_shstrndx);
    } else {
        throw FormatError("invalid ELF header file class");
    }
    
    /* Magic number. disk32 and disk64 have header bytes at same offset */
    p_magic.clear();
    for (size_t i=0; i<sizeof(disk32.e_ident_magic); i++)
        p_magic.push_back(disk32.e_ident_magic[i]);
    
    /* File format */
    p_exec_format->set_family(FAMILY_ELF);
    switch (p_e_type) {
      case 0:
        p_exec_format->set_purpose(PURPOSE_UNSPECIFIED);
        break;
      case 1:
      case 3:
        p_exec_format->set_purpose(PURPOSE_LIBRARY);
        break;
      case 2:
        p_exec_format->set_purpose(PURPOSE_EXECUTABLE);
        break;
      case 4:
        p_exec_format->set_purpose(PURPOSE_CORE_DUMP);
        break;
      default:
        if (p_e_type >= 0xff00 && p_e_type <= 0xffff) {
            p_exec_format->set_purpose(PURPOSE_PROC_SPECIFIC);
        } else {
            p_exec_format->set_purpose(PURPOSE_OTHER);
        }
        break;
    }
    p_exec_format->set_is_current_version(1 == p_exec_format->get_version());
    p_exec_format->set_abi(ABI_UNSPECIFIED);                 /* ELF specifies a target architecture rather than an ABI */
    p_exec_format->set_abi_version(0);

    /* Target architecture */
    set_isa(machine_to_isa(p_e_machine));

    /* Read the optional section and segment tables and the sections to which they point. An empty section or segment table is
     * treated as if it doesn't exist. This seems to be compatible with the loader since the 45-bit "tiny" ELF executable
     * stores a zero in the e_shnum member and a completely invalid value in the e_shoff member. */
    if (sectab_rva>0 && get_e_shnum()>0) {
        SgAsmElfSectionTable *tab = new SgAsmElfSectionTable(this);
        tab->set_offset(sectab_rva.get_rva());
        tab->parse();
    }
    if (segtab_rva>0 && get_e_phnum()>0) {
        SgAsmElfSegmentTable *tab = new SgAsmElfSegmentTable(this);
        tab->set_offset(segtab_rva.get_rva());
        tab->parse();
    }
    
    /* Associate the entry point with a particular section. */
    entry_rva.bind(this);
    add_entry_rva(entry_rva);
    
    return this;
}