static inline int canonicalize (Dwfl_Error error) { unsigned int value; switch (error) { default: value = error; if ((value &~ 0xffff) != 0) break; assert (value < nmsgidx); break; case DWFL_E_ERRNO: value = DWFL_E (ERRNO, errno); break; case DWFL_E_LIBELF: value = DWFL_E (LIBELF, elf_errno ()); break; case DWFL_E_LIBDW: value = DWFL_E (LIBDW, INTUSE(dwarf_errno) ()); break; #if 0 DWFL_E_LIBEBL: value = DWFL_E (LIBEBL, ebl_errno ()); break; #endif } return value; }
/* Open libelf FILE->fd and compute the load base of ELF as loaded in MOD. When we return success, FILE->elf and FILE->bias are set up. */ static inline Dwfl_Error open_elf (Dwfl_Module *mod, struct dwfl_file *file) { if (file->elf == NULL) { /* If there was a pre-primed file name left that the callback left behind, try to open that file name. */ if (file->fd < 0 && file->name != NULL) file->fd = TEMP_FAILURE_RETRY (open64 (file->name, O_RDONLY)); if (file->fd < 0) return CBFAIL; file->elf = elf_begin (file->fd, ELF_C_READ_MMAP_PRIVATE, NULL); } if (unlikely (elf_kind (file->elf) != ELF_K_ELF)) { close (file->fd); file->fd = -1; return DWFL_E_BADELF; } GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (file->elf, &ehdr_mem); if (ehdr == NULL) { elf_error: close (file->fd); file->fd = -1; return DWFL_E (LIBELF, elf_errno ()); } /* The addresses in an ET_EXEC file are absolute. The lowest p_vaddr of the main file can differ from that of the debug file due to prelink. But that doesn't not change addresses that symbols, debuginfo, or sh_addr of any program sections refer to. */ file->bias = 0; if (mod->e_type != ET_EXEC) for (uint_fast16_t i = 0; i < ehdr->e_phnum; ++i) { GElf_Phdr ph_mem; GElf_Phdr *ph = gelf_getphdr (file->elf, i, &ph_mem); if (ph == NULL) goto elf_error; if (ph->p_type == PT_LOAD) { file->bias = ((mod->low_addr & -ph->p_align) - (ph->p_vaddr & -ph->p_align)); break; } } mod->e_type = ehdr->e_type; /* Relocatable Linux kernels are ET_EXEC but act like ET_DYN. */ if (mod->e_type == ET_EXEC && file->bias != 0) mod->e_type = ET_DYN; return DWFL_E_NOERROR; }
/* Find the index in MOD->reloc_info.refs containing *ADDR. */ static int find_section (Dwfl_Module *mod, Dwarf_Addr *addr) { if (unlikely (mod->reloc_info == NULL) && cache_sections (mod) < 0) return -1; struct dwfl_relocation *sections = mod->reloc_info; /* The sections are sorted by address, so we can use binary search. */ size_t l = 0, u = sections->count; while (l < u) { size_t idx = (l + u) / 2; if (*addr < sections->refs[idx].start) u = idx; else if (*addr > sections->refs[idx].end) l = idx + 1; else { /* Consider the limit of a section to be inside it, unless it's inside the next one. A section limit address can appear in line records. */ if (*addr == sections->refs[idx].end && idx < sections->count && *addr == sections->refs[idx + 1].start) ++idx; *addr -= sections->refs[idx].start; return idx; } } __libdwfl_seterrno (DWFL_E (LIBDW, DWARF_E_NO_MATCH)); return -1; }
/* Try to start up libdw on DEBUGFILE. */ static Dwfl_Error load_dw (Dwfl_Module *mod, struct dwfl_file *debugfile) { if (mod->e_type == ET_REL && !debugfile->relocated) { const Dwfl_Callbacks *const cb = mod->dwfl->callbacks; /* The debugging sections have to be relocated. */ if (cb->section_address == NULL) return DWFL_E_NOREL; Dwfl_Error error = __libdwfl_module_getebl (mod); if (error != DWFL_E_NOERROR) return error; find_symtab (mod); Dwfl_Error result = mod->symerr; if (result == DWFL_E_NOERROR) result = __libdwfl_relocate (mod, debugfile->elf, true); if (result != DWFL_E_NOERROR) return result; /* Don't keep the file descriptors around. */ if (mod->main.fd != -1 && elf_cntl (mod->main.elf, ELF_C_FDREAD) == 0) { close (mod->main.fd); mod->main.fd = -1; } if (debugfile->fd != -1 && elf_cntl (debugfile->elf, ELF_C_FDREAD) == 0) { close (debugfile->fd); debugfile->fd = -1; } } mod->dw = INTUSE(dwarf_begin_elf) (debugfile->elf, DWARF_C_READ, NULL); if (mod->dw == NULL) { int err = INTUSE(dwarf_errno) (); return err == DWARF_E_NO_DWARF ? DWFL_E_NO_DWARF : DWFL_E (LIBDW, err); } /* Until we have iterated through all CU's, we might do lazy lookups. */ mod->lazycu = 1; return DWFL_E_NOERROR; }
/* Try to find a symbol table in either MOD->main.elf or MOD->debug.elf. */ static void find_symtab (Dwfl_Module *mod) { if (mod->symdata != NULL /* Already done. */ || mod->symerr != DWFL_E_NOERROR) /* Cached previous failure. */ return; find_file (mod); mod->symerr = mod->elferr; if (mod->symerr != DWFL_E_NOERROR) return; /* First see if the main ELF file has the debugging information. */ Elf_Scn *symscn = NULL, *xndxscn = NULL; GElf_Word strshndx = 0; mod->symerr = load_symtab (&mod->main, &mod->symfile, &symscn, &xndxscn, &mod->syments, &strshndx); switch (mod->symerr) { default: return; case DWFL_E_NOERROR: break; case DWFL_E_NO_SYMTAB: /* Now we have to look for a separate debuginfo file. */ mod->symerr = find_debuginfo (mod); switch (mod->symerr) { default: return; case DWFL_E_NOERROR: mod->symerr = load_symtab (&mod->debug, &mod->symfile, &symscn, &xndxscn, &mod->syments, &strshndx); break; case DWFL_E_CB: /* The find_debuginfo hook failed. */ mod->symerr = DWFL_E_NO_SYMTAB; break; } switch (mod->symerr) { default: return; case DWFL_E_NOERROR: break; case DWFL_E_NO_SYMTAB: if (symscn != NULL) { /* We still have the dynamic symbol table. */ mod->symerr = DWFL_E_NOERROR; break; } /* Last ditch, look for dynamic symbols without section headers. */ find_dynsym (mod); return; } break; } /* This does some sanity checks on the string table section. */ if (elf_strptr (mod->symfile->elf, strshndx, 0) == NULL) { elferr: mod->symerr = DWFL_E (LIBELF, elf_errno ()); return; } /* Cache the data; MOD->syments was set above. */ mod->symstrdata = elf_getdata (elf_getscn (mod->symfile->elf, strshndx), NULL); if (mod->symstrdata == NULL) goto elferr; if (xndxscn == NULL) mod->symxndxdata = NULL; else { mod->symxndxdata = elf_getdata (xndxscn, NULL); if (mod->symxndxdata == NULL) goto elferr; } mod->symdata = elf_getdata (symscn, NULL); if (mod->symdata == NULL) goto elferr; }
/* Try to find a dynamic symbol table via phdrs. */ static void find_dynsym (Dwfl_Module *mod) { GElf_Ehdr ehdr_mem; GElf_Ehdr *ehdr = gelf_getehdr (mod->main.elf, &ehdr_mem); for (uint_fast16_t i = 0; i < ehdr->e_phnum; ++i) { GElf_Phdr phdr_mem; GElf_Phdr *phdr = gelf_getphdr (mod->main.elf, i, &phdr_mem); if (phdr == NULL) break; if (phdr->p_type == PT_DYNAMIC) { /* Examine the dynamic section for the pointers we need. */ Elf_Data *data = elf_getdata_rawchunk (mod->main.elf, phdr->p_offset, phdr->p_filesz, ELF_T_DYN); if (data == NULL) continue; enum { i_symtab, i_strtab, i_hash, i_gnu_hash, i_max }; GElf_Addr addrs[i_max] = { 0, }; GElf_Xword strsz = 0; size_t n = data->d_size / gelf_fsize (mod->main.elf, ELF_T_DYN, 1, EV_CURRENT); for (size_t j = 0; j < n; ++j) { GElf_Dyn dyn_mem; GElf_Dyn *dyn = gelf_getdyn (data, j, &dyn_mem); if (dyn != NULL) switch (dyn->d_tag) { case DT_SYMTAB: addrs[i_symtab] = dyn->d_un.d_ptr; continue; case DT_HASH: addrs[i_hash] = dyn->d_un.d_ptr; continue; case DT_GNU_HASH: addrs[i_gnu_hash] = dyn->d_un.d_ptr; continue; case DT_STRTAB: addrs[i_strtab] = dyn->d_un.d_ptr; continue; case DT_STRSZ: strsz = dyn->d_un.d_val; continue; default: continue; case DT_NULL: break; } break; } /* Translate pointers into file offsets. */ GElf_Off offs[i_max] = { 0, }; find_offsets (mod->main.elf, ehdr, i_max, addrs, offs); /* Figure out the size of the symbol table. */ if (offs[i_hash] != 0) { /* In the original format, .hash says the size of .dynsym. */ size_t entsz = SH_ENTSIZE_HASH (ehdr); data = elf_getdata_rawchunk (mod->main.elf, offs[i_hash] + entsz, entsz, entsz == 4 ? ELF_T_WORD : ELF_T_XWORD); if (data != NULL) mod->syments = (entsz == 4 ? *(const GElf_Word *) data->d_buf : *(const GElf_Xword *) data->d_buf); } if (offs[i_gnu_hash] != 0 && mod->syments == 0) { /* In the new format, we can derive it with some work. */ const struct { Elf32_Word nbuckets; Elf32_Word symndx; Elf32_Word maskwords; Elf32_Word shift2; } *header; data = elf_getdata_rawchunk (mod->main.elf, offs[i_gnu_hash], sizeof *header, ELF_T_WORD); if (data != NULL) { header = data->d_buf; Elf32_Word nbuckets = header->nbuckets; Elf32_Word symndx = header->symndx; GElf_Off buckets_at = (offs[i_gnu_hash] + sizeof *header + (gelf_getclass (mod->main.elf) * sizeof (Elf32_Word) * header->maskwords)); data = elf_getdata_rawchunk (mod->main.elf, buckets_at, nbuckets * sizeof (Elf32_Word), ELF_T_WORD); if (data != NULL && symndx < nbuckets) { const Elf32_Word *const buckets = data->d_buf; Elf32_Word maxndx = symndx; for (Elf32_Word bucket = 0; bucket < nbuckets; ++bucket) if (buckets[bucket] > maxndx) maxndx = buckets[bucket]; GElf_Off hasharr_at = (buckets_at + nbuckets * sizeof (Elf32_Word)); hasharr_at += (maxndx - symndx) * sizeof (Elf32_Word); do { data = elf_getdata_rawchunk (mod->main.elf, hasharr_at, sizeof (Elf32_Word), ELF_T_WORD); if (data != NULL && (*(const Elf32_Word *) data->d_buf & 1u)) { mod->syments = maxndx + 1; break; } ++maxndx; hasharr_at += sizeof (Elf32_Word); } while (data != NULL); } } } if (offs[i_strtab] > offs[i_symtab] && mod->syments == 0) mod->syments = ((offs[i_strtab] - offs[i_symtab]) / gelf_fsize (mod->main.elf, ELF_T_SYM, 1, EV_CURRENT)); if (mod->syments > 0) { mod->symdata = elf_getdata_rawchunk (mod->main.elf, offs[i_symtab], gelf_fsize (mod->main.elf, ELF_T_SYM, mod->syments, EV_CURRENT), ELF_T_SYM); if (mod->symdata != NULL) { mod->symstrdata = elf_getdata_rawchunk (mod->main.elf, offs[i_strtab], strsz, ELF_T_BYTE); if (mod->symstrdata == NULL) mod->symdata = NULL; } if (mod->symdata == NULL) mod->symerr = DWFL_E (LIBELF, elf_errno ()); else { mod->symfile = &mod->main; mod->symerr = DWFL_E_NOERROR; } return; } } } }
/* Open libelf FILE->fd and compute the load base of ELF as loaded in MOD. When we return success, FILE->elf and FILE->vaddr are set up. */ static inline Dwfl_Error open_elf (Dwfl_Module *mod, struct dwfl_file *file) { Dwfl_Error error = open_elf_file (&file->elf, &file->fd, &file->name); if (error != DWFL_E_NOERROR) return error; GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (file->elf, &ehdr_mem); if (ehdr == NULL) { elf_error: elf_end (file->elf); file->elf = NULL; close (file->fd); file->fd = -1; return DWFL_E (LIBELF, elf_errno ()); } if (ehdr->e_type != ET_REL) { /* In any non-ET_REL file, we compute the "synchronization address". We start with the address at the end of the first PT_LOAD segment. When prelink converts REL to RELA in an ET_DYN file, it expands the space between the beginning of the segment and the actual code/data addresses. Since that change wasn't made in the debug file, the distance from p_vaddr to an address of interest (in an st_value or DWARF data) now differs between the main and debug files. The distance from address_sync to an address of interest remains consistent. If there are no section headers at all (full stripping), then the end of the first segment is a valid synchronization address. This cannot happen in a prelinked file, since prelink itself relies on section headers for prelinking and for undoing it. (If you do full stripping on a prelinked file, then you get what you deserve--you can neither undo the prelinking, nor expect to line it up with a debug file separated before prelinking.) However, when prelink processes an ET_EXEC file, it can do something different. There it juggles the "special" sections (SHT_DYNSYM et al) to make space for the additional prelink special sections. Sometimes it will do this by moving a special section like .dynstr after the real program sections in the first PT_LOAD segment--i.e. to the end. That changes the end address of the segment, so it no longer lines up correctly and is not a valid synchronization address to use. Because of this, we need to apply a different prelink-savvy means to discover the synchronization address when there is a separate debug file and a prelinked main file. That is done in find_debuginfo, below. */ size_t phnum; if (unlikely (elf_getphdrnum (file->elf, &phnum) != 0)) goto elf_error; file->vaddr = file->address_sync = 0; for (size_t i = 0; i < phnum; ++i) { GElf_Phdr ph_mem; GElf_Phdr *ph = gelf_getphdr (file->elf, i, &ph_mem); if (unlikely (ph == NULL)) goto elf_error; if (ph->p_type == PT_LOAD) { file->vaddr = ph->p_vaddr & -ph->p_align; file->address_sync = ph->p_vaddr + ph->p_memsz; break; } } } /* We only want to set the module e_type explictly once, derived from the main ELF file. (It might be changed for the kernel, because that is special - see below.) open_elf is always called first for the main ELF file, because both find_dw and find_symtab call __libdwfl_getelf first to open the main file. So don't let debug or aux files override the module e_type. The kernel heuristic below could otherwise trigger for non-kernel/non-main files, since their phdrs might not match the actual load addresses. */ if (file == &mod->main) { mod->e_type = ehdr->e_type; /* Relocatable Linux kernels are ET_EXEC but act like ET_DYN. */ if (mod->e_type == ET_EXEC && file->vaddr != mod->low_addr) mod->e_type = ET_DYN; } else assert (mod->main.elf != NULL); return DWFL_E_NOERROR; }
int dwfl_module_getsrc_file (Dwfl_Module *mod, const char *fname, int lineno, int column, Dwfl_Line ***srcsp, size_t *nsrcs) { if (mod == NULL) return -1; if (mod->dw == NULL) { Dwarf_Addr bias; if (INTUSE(dwfl_module_getdwarf) (mod, &bias) == NULL) return -1; } bool is_basename = strchr (fname, '/') == NULL; size_t max_match = *nsrcs ?: ~0u; size_t act_match = *nsrcs; size_t cur_match = 0; Dwfl_Line **match = *nsrcs == 0 ? NULL : *srcsp; struct dwfl_cu *cu = NULL; Dwfl_Error error; while ((error = __libdwfl_nextcu (mod, cu, &cu)) == DWFL_E_NOERROR && cu != NULL && (error = __libdwfl_cu_getsrclines (cu)) == DWFL_E_NOERROR) { inline const char *INTUSE(dwarf_line_file) (const Dwarf_Line *line) { return line->files->info[line->file].name; } inline Dwarf_Line *dwfl_line (const Dwfl_Line *line) { return &dwfl_linecu (line)->die.cu->lines->info[line->idx]; } inline const char *dwfl_line_file (const Dwfl_Line *line) { return INTUSE(dwarf_line_file) (dwfl_line (line)); } /* Search through all the line number records for a matching file and line/column number. If any of the numbers is zero, no match is performed. */ const char *lastfile = NULL; bool lastmatch = false; for (size_t cnt = 0; cnt < cu->die.cu->lines->nlines; ++cnt) { Dwarf_Line *line = &cu->die.cu->lines->info[cnt]; if (unlikely (line->file >= line->files->nfiles)) { __libdwfl_seterrno (DWFL_E (LIBDW, DWARF_E_INVALID_DWARF)); return -1; } else { const char *file = INTUSE(dwarf_line_file) (line); if (file != lastfile) { /* Match the name with the name the user provided. */ lastfile = file; lastmatch = !strcmp (is_basename ? basename (file) : file, fname); } } if (!lastmatch) continue; /* See whether line and possibly column match. */ if (lineno != 0 && (lineno > line->line || (column != 0 && column > line->column))) /* Cannot match. */ continue; /* Determine whether this is the best match so far. */ size_t inner; for (inner = 0; inner < cur_match; ++inner) if (dwfl_line_file (match[inner]) == INTUSE(dwarf_line_file) (line)) break; if (inner < cur_match && (dwfl_line (match[inner])->line != line->line || dwfl_line (match[inner])->line != lineno || (column != 0 && (dwfl_line (match[inner])->column != line->column || dwfl_line (match[inner])->column != column)))) { /* We know about this file already. If this is a better match for the line number, use it. */ if (dwfl_line (match[inner])->line >= line->line && (dwfl_line (match[inner])->line != line->line || dwfl_line (match[inner])->column >= line->column)) /* Use the new line. Otherwise the old one. */ match[inner] = &cu->lines->idx[cnt]; continue; } if (cur_match < max_match) { if (cur_match == act_match) { /* Enlarge the array for the results. */ act_match += 10; Dwfl_Line **newp = realloc (match, act_match * sizeof (Dwfl_Line *)); if (newp == NULL) { free (match); __libdwfl_seterrno (DWFL_E_NOMEM); return -1; } match = newp; } match[cur_match++] = &cu->lines->idx[cnt]; } } } if (cur_match > 0) { assert (*nsrcs == 0 || *srcsp == match); *nsrcs = cur_match; *srcsp = match; return 0; } __libdwfl_seterrno (DWFL_E_NO_MATCH); return -1; }