// process segments from interpreter (ld.so or ld-linux.so) static bool read_interp_segments(struct ps_prochandle* ph) { ELF_EHDR interp_ehdr; if (read_elf_header(ph->core->interp_fd, &interp_ehdr) != true) { print_debug("interpreter is not a valid ELF file\n"); return false; } if (read_lib_segments(ph, ph->core->interp_fd, &interp_ehdr, ph->core->ld_base_addr) != true) { print_debug("can't read segments of interpreter\n"); return false; } return true; }
int open_elf(const char *filename, elf_bin_t *bin) { int err; bin->fd = -1; if ( (err = open_exec(filename)) < 0 ) return err; bin->fd = err; if ( (err = read_elf_header(bin->fd, &bin->hdr)) < 0 ) return err; return 0; }
/* Main entry point of elf-parser */ int32_t main(int32_t argc, char *argv[]) { int32_t fd; Elf32_Ehdr eh; /* elf-header is fixed size */ if(argc!=2) { printf("Usage: elf-parser <ELF-file>\n"); return 0; } fd = open(argv[1], O_RDONLY|O_SYNC); if(fd<0) { printf("Error %d Unable to open %s\n", fd, argv[1]); return 0; } /* ELF header : at start of file */ read_elf_header(fd, &eh); if(!is_ELF(eh)) { return 0; } if(is64Bit(eh)){ Elf64_Ehdr eh64; /* elf-header is fixed size */ Elf64_Shdr* sh_tbl; /* section-header table is variable size */ read_elf_header64(fd, &eh64); print_elf_header64(eh64); /* Section header table : */ sh_tbl = malloc(eh64.e_shentsize * eh64.e_shnum); if(!sh_tbl) { printf("Failed to allocate %d bytes\n", (eh64.e_shentsize * eh64.e_shnum)); } read_section_header_table64(fd, eh64, sh_tbl); print_section_headers64(fd, eh64, sh_tbl); /* Symbol tables : * sh_tbl[i].sh_type * |`- SHT_SYMTAB * `- SHT_DYNSYM */ print_symbols64(fd, eh64, sh_tbl); /* Save .text section as text.S */ save_text_section64(fd, eh64, sh_tbl); /* Disassemble .text section * Logs asm instructions to stdout * Currently supports ARMv7 */ disassemble64(fd, eh64, sh_tbl); } else{ Elf32_Shdr* sh_tbl; /* section-header table is variable size */ print_elf_header(eh); /* Section header table : */ sh_tbl = malloc(eh.e_shentsize * eh.e_shnum); if(!sh_tbl) { printf("Failed to allocate %d bytes\n", (eh.e_shentsize * eh.e_shnum)); } read_section_header_table(fd, eh, sh_tbl); print_section_headers(fd, eh, sh_tbl); /* Symbol tables : * sh_tbl[i].sh_type * |`- SHT_SYMTAB * `- SHT_DYNSYM */ print_symbols(fd, eh, sh_tbl); /* Save .text section as text.S */ save_text_section(fd, eh, sh_tbl); /* Disassemble .text section * Logs asm instructions to stdout * Currently supports ARMv7 */ disassemble(fd, eh, sh_tbl); } return 0; }
// the one and only one exposed stuff from this file struct ps_prochandle* Pgrab_core(const char* exec_file, const char* core_file) { ELF_EHDR core_ehdr; ELF_EHDR exec_ehdr; ELF_EHDR lib_ehdr; struct ps_prochandle* ph = (struct ps_prochandle*) calloc(1, sizeof(struct ps_prochandle)); if (ph == NULL) { print_debug("can't allocate ps_prochandle\n"); return NULL; } if ((ph->core = (struct core_data*) calloc(1, sizeof(struct core_data))) == NULL) { free(ph); print_debug("can't allocate ps_prochandle\n"); return NULL; } // initialize ph ph->ops = &core_ops; ph->core->core_fd = -1; ph->core->exec_fd = -1; ph->core->interp_fd = -1; // open the core file if ((ph->core->core_fd = open(core_file, O_RDONLY)) < 0) { print_debug("can't open core file\n"); goto err; } // read core file ELF header if (read_elf_header(ph->core->core_fd, &core_ehdr) != true || core_ehdr.e_type != ET_CORE) { print_debug("core file is not a valid ELF ET_CORE file\n"); goto err; } if ((ph->core->exec_fd = open(exec_file, O_RDONLY)) < 0) { print_debug("can't open executable file\n"); goto err; } if (read_elf_header(ph->core->exec_fd, &exec_ehdr) != true || exec_ehdr.e_type != ET_EXEC) { print_debug("executable file is not a valid ELF ET_EXEC file\n"); goto err; } // process core file segments if (read_core_segments(ph, &core_ehdr) != true) goto err; // process exec file segments if (read_exec_segments(ph, &exec_ehdr) != true) goto err; // exec file is also treated like a shared object for symbol search if (add_lib_info_fd(ph, exec_file, ph->core->exec_fd, (uintptr_t)0 + find_base_address(ph->core->exec_fd, &exec_ehdr)) == NULL) goto err; // allocate and sort maps into map_array, we need to do this // here because read_shared_lib_info needs to read from debuggee // address space if (sort_map_array(ph) != true) goto err; if (read_shared_lib_info(ph) != true) goto err; // sort again because we have added more mappings from shared objects if (sort_map_array(ph) != true) goto err; if (init_classsharing_workaround(ph) != true) goto err; return ph; err: Prelease(ph); return NULL; }
// read shared library info from runtime linker's data structures. // This work is done by librtlb_db in Solaris static bool read_shared_lib_info(struct ps_prochandle* ph) { uintptr_t addr = ph->core->dynamic_addr; uintptr_t debug_base; uintptr_t first_link_map_addr; uintptr_t ld_base_addr; uintptr_t link_map_addr; uintptr_t lib_base_diff; uintptr_t lib_base; uintptr_t lib_name_addr; char lib_name[BUF_SIZE]; ELF_DYN dyn; ELF_EHDR elf_ehdr; int lib_fd; // _DYNAMIC has information of the form // [tag] [data] [tag] [data] ..... // Both tag and data are pointer sized. // We look for dynamic info with DT_DEBUG. This has shared object info. // refer to struct r_debug in link.h dyn.d_tag = DT_NULL; while (dyn.d_tag != DT_DEBUG) { if (ps_pdread(ph, (psaddr_t) addr, &dyn, sizeof(ELF_DYN)) != PS_OK) { print_debug("can't read debug info from _DYNAMIC\n"); return false; } addr += sizeof(ELF_DYN); } // we have got Dyn entry with DT_DEBUG debug_base = dyn.d_un.d_ptr; // at debug_base we have struct r_debug. This has first link map in r_map field if (ps_pdread(ph, (psaddr_t) debug_base + FIRST_LINK_MAP_OFFSET, &first_link_map_addr, sizeof(uintptr_t)) != PS_OK) { print_debug("can't read first link map address\n"); return false; } // read ld_base address from struct r_debug if (ps_pdread(ph, (psaddr_t) debug_base + LD_BASE_OFFSET, &ld_base_addr, sizeof(uintptr_t)) != PS_OK) { print_debug("can't read ld base address\n"); return false; } ph->core->ld_base_addr = ld_base_addr; print_debug("interpreter base address is 0x%lx\n", ld_base_addr); // now read segments from interp (i.e ld.so or ld-linux.so) if (read_interp_segments(ph) != true) return false; // after adding interpreter (ld.so) mappings sort again if (sort_map_array(ph) != true) return false; print_debug("first link map is at 0x%lx\n", first_link_map_addr); link_map_addr = first_link_map_addr; while (link_map_addr != 0) { // read library base address of the .so. Note that even though <sys/link.h> calls // link_map->l_addr as "base address", this is * not * really base virtual // address of the shared object. This is actually the difference b/w the virtual // address mentioned in shared object and the actual virtual base where runtime // linker loaded it. We use "base diff" in read_lib_segments call below. if (ps_pdread(ph, (psaddr_t) link_map_addr + LINK_MAP_ADDR_OFFSET, &lib_base_diff, sizeof(uintptr_t)) != PS_OK) { print_debug("can't read shared object base address diff\n"); return false; } // read address of the name if (ps_pdread(ph, (psaddr_t) link_map_addr + LINK_MAP_NAME_OFFSET, &lib_name_addr, sizeof(uintptr_t)) != PS_OK) { print_debug("can't read address of shared object name\n"); return false; } // read name of the shared object lib_name[0] = '\0'; if (lib_name_addr != 0 && read_string(ph, (uintptr_t) lib_name_addr, lib_name, sizeof(lib_name)) != true) { print_debug("can't read shared object name\n"); // don't let failure to read the name stop opening the file. If something is really wrong // it will fail later. } if (lib_name[0] != '\0') { // ignore empty lib names lib_fd = pathmap_open(lib_name); if (lib_fd < 0) { print_debug("can't open shared object %s\n", lib_name); // continue with other libraries... } else { if (read_elf_header(lib_fd, &elf_ehdr)) { lib_base = lib_base_diff + find_base_address(lib_fd, &elf_ehdr); print_debug("reading library %s @ 0x%lx [ 0x%lx ]\n", lib_name, lib_base, lib_base_diff); // while adding library mappings we need to use "base difference". if (! read_lib_segments(ph, lib_fd, &elf_ehdr, lib_base_diff)) { print_debug("can't read shared object's segments\n"); close(lib_fd); return false; } add_lib_info_fd(ph, lib_name, lib_fd, lib_base); // Map info is added for the library (lib_name) so // we need to re-sort it before calling the p_pdread. if (sort_map_array(ph) != true) return false; } else { print_debug("can't read ELF header for shared object %s\n", lib_name); close(lib_fd); // continue with other libraries... } } } // read next link_map address if (ps_pdread(ph, (psaddr_t) link_map_addr + LINK_MAP_NEXT_OFFSET, &link_map_addr, sizeof(uintptr_t)) != PS_OK) { print_debug("can't read next link in link_map\n"); return false; } } return true; }
/* Map the built-in lisp core sections. NOTE! We need to do this without using malloc because the memory layout is not set until some time after this is done. */ void map_core_sections(const char *exec_name) { int exec_fd; Elf_Shdr sh; /* A section header entry. */ Elf_Shdr strsecent; char nambuf[10]; int soff; int strsecoff; /* File offset to string table section. */ int sections_remaining = 3; int i, j; extern int image_dynamic_space_size, image_static_space_size, image_read_only_space_size; if (!(exec_fd = open(exec_name, O_RDONLY))) { perror("Can't open executable!"); exit(-1); } read_elf_header(exec_fd, &eh); /* Find the section name string section. Save its file offset. */ soff = eh.e_shoff + eh.e_shstrndx * eh.e_shentsize; elseek(exec_fd, soff, __func__); read_section_header_entry(exec_fd, &strsecent); strsecoff = strsecent.sh_offset; for (i = 0; i < eh.e_shnum && sections_remaining > 0; i++) { /* Read an entry from the section header table. */ elseek(exec_fd, eh.e_shoff + i * eh.e_shentsize, __func__); read_section_header_entry(exec_fd, &sh); /* Read the name from the string section. */ elseek(exec_fd, strsecoff + sh.sh_name, __func__); eread(exec_fd, nambuf, 6, __func__); if (sh.sh_type == SHT_PROGBITS) { /* See if this section is one of the lisp core sections. */ for (j = 0; j < 3; j++) { if (!strncmp(nambuf, section_names[j], 6)) { os_vm_address_t addr; /* * Found a core section. Map it! * * Although the segment may contain the correct * address for the start of the segment, we don't * care. We infer the address from the segment * name. (The names better be unique!!!!) This * approach allows for a possibly simpler linking * operation because we don't have to figure out * how to get the linker to give segments the * correct address. */ addr = section_addr[j]; if ((os_vm_address_t) os_map(exec_fd, sh.sh_offset, addr, sh.sh_size) == (os_vm_address_t) -1) { fprintf(stderr, "%s: Can't map section %s\n", __func__, section_names[j]); exit(-1); } switch(j) { case 0: /* Dynamic space. */ /* Dynamic space variables are set in lisp.c. */ image_dynamic_space_size = sh.sh_size; break; case 1: /* Static space. */ image_static_space_size = sh.sh_size; break; case 2: /* Read-only space. */ image_read_only_space_size = sh.sh_size; break; default: /* Should never get here. */ abort(); break; } sections_remaining--; /* Found a core section, don't check the other core section names. */ break; } } } } close(exec_fd); if (sections_remaining != 0) { fprintf(stderr, "Couldn't map all core sections! Exiting!\n"); exit(-1); } }
// read symbol table from given fd. struct symtab* build_symtab(int fd) { ELF_EHDR ehdr; struct symtab* symtab = NULL; // Reading of elf header struct elf_section *scn_cache = NULL; int cnt = 0; ELF_SHDR* shbuf = NULL; ELF_SHDR* cursct = NULL; ELF_PHDR* phbuf = NULL; int symtab_found = 0; int dynsym_found = 0; uint32_t symsection = SHT_SYMTAB; uintptr_t baseaddr = (uintptr_t)-1; lseek(fd, (off_t)0L, SEEK_SET); if (! read_elf_header(fd, &ehdr)) { // not an elf return NULL; } // read ELF header if ((shbuf = read_section_header_table(fd, &ehdr)) == NULL) { goto quit; } baseaddr = find_base_address(fd, &ehdr); scn_cache = calloc(ehdr.e_shnum, sizeof(*scn_cache)); if (scn_cache == NULL) { goto quit; } for (cursct = shbuf, cnt = 0; cnt < ehdr.e_shnum; cnt++) { scn_cache[cnt].c_shdr = cursct; if (cursct->sh_type == SHT_SYMTAB || cursct->sh_type == SHT_STRTAB || cursct->sh_type == SHT_DYNSYM) { if ( (scn_cache[cnt].c_data = read_section_data(fd, &ehdr, cursct)) == NULL) { goto quit; } } if (cursct->sh_type == SHT_SYMTAB) symtab_found++; if (cursct->sh_type == SHT_DYNSYM) dynsym_found++; cursct++; } if (!symtab_found && dynsym_found) symsection = SHT_DYNSYM; for (cnt = 1; cnt < ehdr.e_shnum; cnt++) { ELF_SHDR *shdr = scn_cache[cnt].c_shdr; if (shdr->sh_type == symsection) { ELF_SYM *syms; int j, n; size_t size; // FIXME: there could be multiple data buffers associated with the // same ELF section. Here we can handle only one buffer. See man page // for elf_getdata on Solaris. // guarantee(symtab == NULL, "multiple symtab"); symtab = calloc(1, sizeof(*symtab)); if (symtab == NULL) { goto quit; } // the symbol table syms = (ELF_SYM *)scn_cache[cnt].c_data; // number of symbols n = shdr->sh_size / shdr->sh_entsize; // create hash table, we use berkeley db to // manipulate the hash table. symtab->hash_table = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL); // guarantee(symtab->hash_table, "unexpected failure: dbopen"); if (symtab->hash_table == NULL) goto bad; // shdr->sh_link points to the section that contains the actual strings // for symbol names. the st_name field in ELF_SYM is just the // string table index. we make a copy of the string table so the // strings will not be destroyed by elf_end. size = scn_cache[shdr->sh_link].c_shdr->sh_size; symtab->strs = malloc(size); if (symtab->strs == NULL) goto bad; memcpy(symtab->strs, scn_cache[shdr->sh_link].c_data, size); // allocate memory for storing symbol offset and size; symtab->num_symbols = n; symtab->symbols = calloc(n , sizeof(*symtab->symbols)); if (symtab->symbols == NULL) goto bad; // copy symbols info our symtab and enter them info the hash table for (j = 0; j < n; j++, syms++) { DBT key, value; char *sym_name = symtab->strs + syms->st_name; // skip non-object and non-function symbols int st_type = ELF_ST_TYPE(syms->st_info); if ( st_type != STT_FUNC && st_type != STT_OBJECT) continue; // skip empty strings and undefined symbols if (*sym_name == '\0' || syms->st_shndx == SHN_UNDEF) continue; symtab->symbols[j].name = sym_name; symtab->symbols[j].offset = syms->st_value - baseaddr; symtab->symbols[j].size = syms->st_size; key.data = sym_name; key.size = strlen(sym_name) + 1; value.data = &(symtab->symbols[j]); value.size = sizeof(symtab_symbol); (*symtab->hash_table->put)(symtab->hash_table, &key, &value, 0); } } } goto quit; bad: destroy_symtab(symtab); symtab = NULL; quit: if (shbuf) free(shbuf); if (phbuf) free(phbuf); if (scn_cache) { for (cnt = 0; cnt < ehdr.e_shnum; cnt++) { if (scn_cache[cnt].c_data != NULL) { free(scn_cache[cnt].c_data); } } free(scn_cache); } return symtab; }
void elf_mod_symload(int strtablen) { Elf_Ehdr ehdr; char *shstrtab; struct elf_section *head, *s; char *symbuf, *strbuf; /* * Seek to the text offset to start loading... */ if (lseek(modfd, 0, SEEK_SET) == -1) err(12, "lseek"); if (read_elf_header(modfd, &ehdr) < 0) return; shstrtab = read_shstring_table(modfd, &ehdr); read_sections(modfd, &ehdr, shstrtab, &head); for (s = head; s; s = s->next) { struct elf_section *p = s; if ((p->type == SHT_SYMTAB) || (p->type == SHT_DYNSYM)) { if (debug) fprintf(stderr, "loading `%s': addr = %p, " "size = %#lx\n", s->name, s->addr, (u_long)s->size); /* * Seek to the file offset to start loading it... */ if (lseek(modfd, p->offset, SEEK_SET) == -1) err(12, "lseek"); symbuf = malloc(p->size); if (symbuf == 0) err(13, "malloc"); if (read(modfd, symbuf, p->size) != p->size) err(14, "read"); loadsym(symbuf, p->size); free(symbuf); } } for (s = head; s; s = s->next) { struct elf_section *p = s; if ((p->type == SHT_STRTAB) && (strcmp(p->name, ".strtab") == 0 )) { if (debug) fprintf(stderr, "loading `%s': addr = %p, " "size = %#lx\n", s->name, s->addr, (u_long)s->size); /* * Seek to the file offset to start loading it... */ if (lseek(modfd, p->offset, SEEK_SET) == -1) err(12, "lseek"); strbuf = malloc(p->size); if (strbuf == 0) err(13, "malloc"); if (read(modfd, strbuf, p->size) != p->size) err(14, "read"); loadsym(strbuf, p->size); free(strbuf); } } free(shstrtab); free_sections(head); return; }
/* load a prelinked module; returns entry point */ void * elf_mod_load(int fd) { Elf_Ehdr ehdr; size_t zero_size = 0; size_t b; ssize_t n; char *shstrtab; struct elf_section *head, *s; char buf[10 * BUFSIZ]; void *addr = NULL; if (read_elf_header(fd, &ehdr) < 0) return NULL; shstrtab = read_shstring_table(fd, &ehdr); read_sections(fd, &ehdr, shstrtab, &head); for (s = head; s; s = s->next) { if (s->type != SHT_STRTAB && s->type != SHT_SYMTAB && s->type != SHT_DYNSYM) { if (debug) fprintf(stderr, "loading `%s': addr = %p, " "size = %#lx\n", s->name, s->addr, (u_long)s->size); if (s->type == SHT_NOBITS) { /* skip some space */ zero_size += s->size; } else { if (addr != NULL) { /* * if there is a gap in the prelinked * module, transfer some empty space. */ zero_size += (char*)s->addr - (char*)addr; } if (zero_size) { loadspace(zero_size); zero_size = 0; } b = s->size; if (lseek(fd, s->offset, SEEK_SET) == -1) err(1, "lseek"); while (b) { n = read(fd, buf, MIN(b, sizeof(buf))); if (n == 0) errx(1, "unexpected EOF"); if (n < 0) err(1, "read"); loadbuf(buf, n); b -= n; } addr = (char*)s->addr + s->size; } } } if (zero_size) loadspace(zero_size); free_sections(head); free(shstrtab); return (void *)ehdr.e_entry; }
/* return size needed by the module */ int elf_mod_sizes(int fd, size_t *modsize, int *strtablen, struct lmc_resrv *resrvp, struct stat *sp) { Elf_Ehdr ehdr; ssize_t off = 0; size_t data_hole = 0; char *shstrtab, *strtab; struct elf_section *head, *s, *stab; if (read_elf_header(fd, &ehdr) < 0) return -1; shstrtab = read_shstring_table(fd, &ehdr); read_sections(fd, &ehdr, shstrtab, &head); for (s = head; s; s = s->next) { /* XXX impossible! */ if (s->type == SHT_STRTAB && s->type == SHT_SYMTAB && s->type == SHT_DYNSYM) continue; if (debug) fprintf(stderr, "%s: addr = %p size = %#lx align = %#lx\n", s->name, s->addr, (u_long)s->size, (u_long)s->align); /* * XXX try to get rid of the hole before the data * section that GNU-ld likes to put there */ if (strcmp(s->name, ".data") == 0 && s->addr > (void *)off) { data_offset = roundup(off, s->align); if (debug) fprintf(stderr, ".data section forced to " "offset %p (was %p)\n", (void *)data_offset, s->addr); /* later remove size of compressed hole from off */ data_hole = (ssize_t)s->addr - data_offset; } off = (ssize_t)s->addr + s->size; } off -= data_hole; /* XXX round to pagesize? */ *modsize = roundup(off, sysconf(_SC_PAGESIZE)); /* get string table length */ strtab = read_string_table(fd, head, strtablen); free(shstrtab); free(strtab); /* get symbol table sections */ get_symtab(&head); stab = head; resrvp->sym_symsize = 0; while (stab) { resrvp->sym_symsize += stab->size; stab = stab->next; } resrvp->sym_size = resrvp->sym_symsize + *strtablen; free_sections(head); return (0); }