internal_function __libdw_fde_by_offset (Dwarf_CFI *cache, Dwarf_Off offset) { Dwarf_CFI_Entry entry; Dwarf_Off next_offset; int result = INTUSE(dwarf_next_cfi) (cache->e_ident, &cache->data->d, CFI_IS_EH (cache), offset, &next_offset, &entry); if (result != 0) { if (result > 0) invalid: __libdw_seterrno (DWARF_E_INVALID_DWARF); return NULL; } if (unlikely (dwarf_cfi_cie_p (&entry))) goto invalid; /* We have a new FDE to consider. */ struct dwarf_fde *fde = intern_fde (cache, &entry.fde); if (fde == (void *) -1l || fde == NULL) return NULL; /* If this happened to be what we would have read next, notice it. */ if (cache->next_offset == offset) cache->next_offset = next_offset; return fde; }
/* Load ELF 'filename', parse the .eh_frame contents, and for each entry in the * second argument check whether its address is contained in the range of some * Frame Description Entry. If it does, fill in the function range of the * entry. In other words, try to assign start address and length of function * corresponding to each backtrace entry. We'll need that for the disassembly. * * Fails quietly - we should still be able to use the build ids. * * I wonder if this is really better than parsing eu-readelf text output. */ static GHashTable *elf_iterate_fdes(const char *filename, GList *entries, Elf *e) { const unsigned char *e_ident; Elf_Data *scn_data; GElf_Shdr shdr; GElf_Phdr phdr; size_t phnum; GHashTable *retval = NULL; /* NULL = error */ e_ident = (unsigned char *)elf_getident(e, NULL); if (e_ident == NULL) { VERB1 log_elf_error("elf_getident", filename); return NULL; } /* Look up the .eh_frame section */ if (!xelf_section_by_name(e, ".eh_frame", filename, &scn_data, &shdr)) { VERB1 log("Section .eh_frame not found in %s", filename); return NULL; } /* Get the address at which the executable segment is loaded. If the * .eh_frame addresses are absolute, this is used to convert them to * relative to the beginning of executable segment. We are looking for the * first LOAD segment that is executable, I hope this is sufficient. */ if (elf_getphdrnum(e, &phnum) != 0) { VERB1 log_elf_error("elf_getphdrnum", filename); return NULL; } uintptr_t exec_base; int i; for (i = 0; i < phnum; i++) { if (gelf_getphdr(e, i, &phdr) != &phdr) { VERB1 log_elf_error("gelf_getphdr", filename); return NULL; } if (phdr.p_type == PT_LOAD && phdr.p_flags & PF_X) { exec_base = (uintptr_t)phdr.p_vaddr; goto base_found; } } VERB1 log("Can't determine executable base for '%s'", filename); return NULL; base_found: VERB2 log("Executable base: %jx", (uintmax_t)exec_base); /* We now have a handle to .eh_frame data. We'll use dwarf_next_cfi to * iterate through all FDEs looking for those matching the addresses we * have. * Some info on .eh_frame can be found at http://www.airs.com/blog/archives/460 * and in DWARF documentation for .debug_frame. The initial_location and * address_range decoding is 'inspired' by elfutils source. * XXX: If this linear scan is too slow, we can do binary search on * .eh_frame_hdr -- see http://www.airs.com/blog/archives/462 */ int ret; Dwarf_Off cfi_offset; Dwarf_Off cfi_offset_next = 0; Dwarf_CFI_Entry cfi; struct cie_encoding { Dwarf_Off cie_offset; int ptr_len; bool pcrel; } *cie; GList *cie_list = NULL; /* Init hash table * keys are pointers to integers which we allocate with malloc * values stored directly */ GHashTable *hash = g_hash_table_new_full(g_int64_hash, g_int64_equal, free, NULL); while(1) { cfi_offset = cfi_offset_next; ret = dwarf_next_cfi(e_ident, scn_data, 1, cfi_offset, &cfi_offset_next, &cfi); if (ret > 0) { /* We're at the end. */ break; } if (ret < 0) { /* Error. If cfi_offset_next was updated, we may skip the * erroneous cfi. */ if (cfi_offset_next > cfi_offset) { continue; } VERB1 log("dwarf_next_cfi failed for %s: %s", filename, dwarf_errmsg(-1)); goto ret_free; } if (dwarf_cfi_cie_p(&cfi)) { /* Current CFI is a CIE. We store its offset and FDE encoding * attributes to be used when reading FDEs. */ /* Default FDE encoding (i.e. no R in augmentation string) is * DW_EH_PE_absptr. */ cie = btp_mallocz(sizeof(*cie)); cie->cie_offset = cfi_offset; cie->ptr_len = encoded_size(DW_EH_PE_absptr, e_ident); /* Search the augmentation data for FDE pointer encoding. * Unfortunately, 'P' can come before 'R' (which we are looking * for), so we may have to parse the whole thing. See the * abovementioned blog post for details. */ const char *aug = cfi.cie.augmentation; const uint8_t *augdata = cfi.cie.augmentation_data; bool skip_cie = 0; if (*aug == 'z') { aug++; } while (*aug != '\0') { if(*aug == 'R') { cie->ptr_len = encoded_size(*augdata, e_ident); if (cie->ptr_len != 4 && cie->ptr_len != 8) { VERB1 log("Unknown FDE encoding (CIE %jx) in %s", (uintmax_t)cfi_offset, filename); skip_cie = 1; } if ((*augdata & 0x70) == DW_EH_PE_pcrel) { cie->pcrel = 1; } break; } else if (*aug == 'L') { augdata++; } else if (*aug == 'P') { unsigned size = encoded_size(*augdata, e_ident); if (size == 0) { VERB1 log("Unknown size for personality encoding in %s", filename); skip_cie = 1; break; } augdata += (size + 1); } else { VERB1 log("Unknown augmentation char in %s", filename); skip_cie = 1; break; } aug++; } if (skip_cie) { free(cie); continue; } cie_list = g_list_append(cie_list, cie); } else { /* Current CFI is an FDE. */ GList *it = cie_list; cie = NULL; /* Find the CIE data that we should have saved earlier. XXX: We can * use hash table/tree to speed up the search, the number of CIEs * should usally be very low though. */ while (it != NULL) { cie = it->data; /* In .eh_frame, CIE_pointer is relative, but libdw converts it * to absolute offset. */ if(cfi.fde.CIE_pointer == cie->cie_offset) { break; /* Found. */ } it = g_list_next(it); } if (it == NULL) { VERB1 log("CIE not found for FDE %jx in %s", (uintmax_t)cfi_offset, filename); continue; } /* Read the two numbers we need and if they are PC-relative, * compute the offset from VMA base */ uintptr_t initial_location = fde_read_address(cfi.fde.start, cie->ptr_len); uintptr_t address_range = fde_read_address(cfi.fde.start+cie->ptr_len, cie->ptr_len); if (cie->pcrel) { /* We need to determine how long is the 'length' (and * consequently CIE id) field of this FDE -- it can be either 4 * or 12 bytes long. */ uintptr_t length = fde_read_address(scn_data->d_buf + cfi_offset, 4); uintptr_t skip = (length == 0xffffffffUL ? 12 : 4); uintptr_t mask = (cie->ptr_len == 4 ? 0xffffffffUL : 0xffffffffffffffffUL); initial_location += (uintptr_t)shdr.sh_offset + (uintptr_t)cfi_offset + 2*skip; initial_location &= mask; } else { /* Assuming that not pcrel means absolute address (what if the file is a library?). * Convert to text-section-start-relative. */ initial_location -= exec_base; } /* Insert the pair into hash */ uintptr_t *key = addr_alloc(initial_location + exec_base); g_hash_table_insert(hash, key, (gpointer)address_range); VERB3 log("FDE start: 0x%jx length: %u", (uintmax_t)*key, (unsigned)address_range); /* Iterate through the backtrace entries and check each address * member whether it belongs into the range given by current FDE. */ for (it = entries; it != NULL; it = g_list_next(it)) { struct backtrace_entry *entry = it->data; if (initial_location <= entry->build_id_offset && entry->build_id_offset < initial_location + address_range) { /* Convert to before-relocation absolute addresses, disassembler uses those. */ entry->function_initial_loc = exec_base + initial_location; entry->function_length = address_range; /*TODO: remove the entry from the list to save a bit of time in next iteration?*/ } } } } retval = hash; /* success */ ret_free: list_free_with_free(cie_list); if (retval == NULL) g_hash_table_destroy(hash); return retval; }
struct sr_elf_fde * sr_elf_get_eh_frame(const char *filename, char **error_message) { #ifdef WITH_ELFUTILS /* Open the input file. */ int fd = open(filename, O_RDONLY); if (fd < 0) { *error_message = sr_asprintf("Failed to open file %s: %s", filename, strerror(errno)); return NULL; } /* Initialize libelf on the opened file. */ Elf *elf = elf_begin(fd, ELF_C_READ, NULL); if (!elf) { *error_message = sr_asprintf("Failed to run elf_begin on file %s: %s", filename, elf_errmsg(-1)); close(fd); return NULL; } unsigned char *e_ident = (unsigned char *)elf_getident(elf, NULL); if (!e_ident) { *error_message = sr_asprintf("elf_getident failed for %s: %s", filename, elf_errmsg(-1)); elf_end(elf); close(fd); return NULL; } /* Look up the .eh_frame section */ GElf_Shdr shdr; Elf_Data *section_data; char *find_section_error_message; if (!find_elf_section_by_name(elf, ".eh_frame", §ion_data, &shdr, &find_section_error_message)) { *error_message = sr_asprintf("Failed to find .eh_frame section for %s: %s", filename, find_section_error_message); free(find_section_error_message); elf_end(elf); close(fd); return NULL; } /* Get the address at which the executable segment is loaded. If * the .eh_frame addresses are absolute, this is used to convert * them to relative to the beginning of executable segment. We * are looking for the first LOAD segment that is executable, I * hope this is sufficient. */ size_t phnum; if (elf_getphdrnum(elf, &phnum) != 0) { *error_message = sr_asprintf("elf_getphdrnum failed for %s: %s", filename, elf_errmsg(-1)); elf_end(elf); close(fd); return NULL; } uint64_t exec_base = -1; int i; for (i = 0; i < phnum; i++) { GElf_Phdr phdr; if (gelf_getphdr(elf, i, &phdr) != &phdr) { *error_message = sr_asprintf("gelf_getphdr failed for %s: %s", filename, elf_errmsg(-1)); elf_end(elf); close(fd); return NULL; } if (phdr.p_type == PT_LOAD && phdr.p_flags & PF_X) { exec_base = phdr.p_vaddr; break; } } if (-1 == exec_base) { *error_message = sr_asprintf("Can't determine executable base for %s", filename); elf_end(elf); close(fd); return NULL; } /* We now have a handle to .eh_frame data. We'll use * dwarf_next_cfi to iterate through all FDEs looking for those * matching the addresses we have. * * Some info on .eh_frame can be found at * http://www.airs.com/blog/archives/460 and in DWARF * documentation for .debug_frame. The initial_location and * address_range decoding is 'inspired' by elfutils source. * * @todo If this linear scan is too slow, we can do binary search * on .eh_frame_hdr -- see http://www.airs.com/blog/archives/462 */ struct sr_elf_fde *result = NULL, *last = NULL; struct cie *cie_list = NULL, *cie_list_last = NULL; Dwarf_Off cfi_offset_next = 0; while (true) { Dwarf_CFI_Entry cfi; Dwarf_Off cfi_offset = cfi_offset_next; int ret = dwarf_next_cfi(e_ident, section_data, 1, cfi_offset, &cfi_offset_next, &cfi); if (ret > 0) { /* We're at the end. */ break; } if (ret < 0) { /* Error. If cfi_offset_next was updated, we may skip the * erroneous cfi. */ if (cfi_offset_next > cfi_offset) continue; *error_message = sr_asprintf("dwarf_next_cfi failed for %s: %s", filename, dwarf_errmsg(-1)); cie_free(cie_list); sr_elf_eh_frame_free(result); elf_end(elf); close(fd); return NULL; } if (dwarf_cfi_cie_p(&cfi)) { /* Current CFI is a CIE. We store its offset and FDE encoding * attributes to be used when reading FDEs. */ /* Default FDE encoding (i.e. no R in augmentation string) is * DW_EH_PE_absptr. */ char *cie_error_message; struct cie *cie = read_cie(&cfi, cfi_offset, e_ident, &cie_error_message); if (!cie) { *error_message = sr_asprintf("CIE reading failed for %s: %s", filename, cie_error_message); free(cie_error_message); cie_free(cie_list); sr_elf_eh_frame_free(result); elf_end(elf); close(fd); return NULL; } /* Append the newly parsed CIE to our list of CIEs. */ if (cie_list) { cie_list_last->next = cie; cie_list_last = cie; } else cie_list = cie_list_last = cie; } else { /* Current CFI is an FDE. */ struct cie *cie = cie_list; /* Find the CIE data that we should have saved earlier. */ while (cie) { /* In .eh_frame, CIE_pointer is relative, but libdw converts it * to absolute offset. */ if (cfi.fde.CIE_pointer == cie->cie_offset) break; /* Found. */ cie = cie->next; } if (!cie) { *error_message = sr_asprintf("CIE not found for FDE %jx in %s", (uintmax_t)cfi_offset, filename); cie_free(cie_list); sr_elf_eh_frame_free(result); elf_end(elf); close(fd); return NULL; } /* Read the two numbers we need and if they are PC-relative, * compute the offset from VMA base */ uint64_t initial_location = fde_read_address(cfi.fde.start, cie->ptr_len); uint64_t address_range = fde_read_address(cfi.fde.start + cie->ptr_len, cie->ptr_len); if (cie->pcrel) { /* We need to determine how long is the 'length' (and * consequently CIE id) field of this FDE -- it can be * either 4 or 12 bytes long. */ uint64_t length = fde_read_address(section_data->d_buf + cfi_offset, 4); uint64_t skip = (length == 0xffffffffUL ? 12 : 4); uint64_t mask = (cie->ptr_len == 4 ? 0xffffffffUL : 0xffffffffffffffffUL); initial_location += shdr.sh_offset + cfi_offset + 2 * skip; initial_location &= mask; } else { /* Assuming that not pcrel means absolute address * (what if the file is a library?). Convert to * text-section-start-relative. */ initial_location -= exec_base; } struct sr_elf_fde *fde = sr_malloc(sizeof(struct sr_elf_fde)); fde->exec_base = exec_base; fde->start_address = initial_location; fde->length = address_range; fde->next = NULL; /* Append the newly parsed Frame Description Entry to our * list of FDEs. */ if (result) { last->next = fde; last = fde; } else result = last = fde; } } cie_free(cie_list); elf_end(elf); close(fd); return result; #else /* WITH_ELFUTILS */ *error_message = sr_asprintf("satyr compiled without elfutils"); return NULL; #endif /* WITH_ELFUTILS */ }
/* Use a binary search table in .eh_frame_hdr format, yield an FDE offset. */ static Dwarf_Off binary_search_fde (Dwarf_CFI *cache, Dwarf_Addr address) { const size_t size = 2 * encoded_value_size (&cache->data->d, cache->e_ident, cache->search_table_encoding, NULL); /* Dummy used by read_encoded_value. */ Dwarf_CFI dummy_cfi = { .e_ident = cache->e_ident, .datarel = cache->search_table_vaddr, .frame_vaddr = cache->search_table_vaddr, }; size_t l = 0, u = cache->search_table_entries; while (l < u) { size_t idx = (l + u) / 2; const uint8_t *p = &cache->search_table[idx * size]; Dwarf_Addr start; if (unlikely (read_encoded_value (&dummy_cfi, cache->search_table_encoding, &p, &start))) break; if (address < start) u = idx; else { Dwarf_Addr fde; if (unlikely (read_encoded_value (&dummy_cfi, cache->search_table_encoding, &p, &fde))) break; if (address >= start) { l = idx + 1; /* If this is the last entry, its upper bound is assumed to be the end of the module. XXX really should be end of containing PT_LOAD segment */ if (l < cache->search_table_entries) { /* Look at the start address in the following entry. */ Dwarf_Addr end; if (unlikely (read_encoded_value (&dummy_cfi, cache->search_table_encoding, &p, &end))) break; if (address >= end) continue; } return fde - cache->frame_vaddr; } } } return (Dwarf_Off) -1l; } struct dwarf_fde * internal_function __libdw_find_fde (Dwarf_CFI *cache, Dwarf_Addr address) { /* Look for a cached FDE covering this address. */ const struct dwarf_fde fde_key = { .start = address, .end = 0 }; struct dwarf_fde **found = tfind (&fde_key, &cache->fde_tree, &compare_fde); if (found != NULL) return *found; /* Use .eh_frame_hdr binary search table if possible. */ if (cache->search_table != NULL) { Dwarf_Off offset = binary_search_fde (cache, address); if (offset == (Dwarf_Off) -1l) goto no_match; struct dwarf_fde *fde = __libdw_fde_by_offset (cache, offset); if (unlikely (fde != NULL) /* Sanity check the address range. */ && unlikely (address < fde->start || address >= fde->end)) { __libdw_seterrno (DWARF_E_INVALID_DWARF); return NULL; } return fde; } /* It's not there. Read more CFI entries until we find it. */ while (1) { Dwarf_Off last_offset = cache->next_offset; Dwarf_CFI_Entry entry; int result = INTUSE(dwarf_next_cfi) (cache->e_ident, &cache->data->d, CFI_IS_EH (cache), last_offset, &cache->next_offset, &entry); if (result > 0) break; if (result < 0) { if (cache->next_offset == last_offset) /* We couldn't progress past the bogus FDE. */ break; /* Skip the loser and look at the next entry. */ continue; } if (dwarf_cfi_cie_p (&entry)) { /* This is a CIE, not an FDE. We eagerly intern these because the next FDE will usually refer to this CIE. */ __libdw_intern_cie (cache, last_offset, &entry.cie); continue; } /* We have a new FDE to consider. */ struct dwarf_fde *fde = intern_fde (cache, &entry.fde); if (fde == (void *) -1l) /* Bad FDE, but we can keep looking. */ continue; if (fde == NULL) /* Bad data. */ return NULL; /* Is this the one we're looking for? */ if (fde->start <= address && fde->end > address) return fde; } no_match: /* We found no FDE covering this address. */ __libdw_seterrno (DWARF_E_NO_MATCH); return NULL; }