/* Search an ELF file for a ".gnu_debuglink" section.  */
static const char *
find_debuglink (Elf *elf, GElf_Word *crc)
{
  size_t shstrndx;
  if (elf_getshstrndx (elf, &shstrndx) < 0)
    return NULL;

  Elf_Scn *scn = NULL;
  while ((scn = elf_nextscn (elf, scn)) != NULL)
    {
      GElf_Shdr shdr_mem;
      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
      if (shdr == NULL)
	return NULL;

      const char *name = elf_strptr (elf, shstrndx, shdr->sh_name);
      if (name == NULL)
	return NULL;

      if (!strcmp (name, ".gnu_debuglink"))
	break;
    }

  if (scn == NULL)
    return NULL;

  /* Found the .gnu_debuglink section.  Extract its contents.  */
  Elf_Data *rawdata = elf_rawdata (scn, NULL);
  if (rawdata == NULL)
    return NULL;

  Elf_Data crcdata =
    {
      .d_type = ELF_T_WORD,
      .d_buf = crc,
      .d_size = sizeof *crc,
      .d_version = EV_CURRENT,
    };
  Elf_Data conv =
    {
      .d_type = ELF_T_WORD,
      .d_buf = rawdata->d_buf + rawdata->d_size - sizeof *crc,
      .d_size = sizeof *crc,
      .d_version = EV_CURRENT,
    };

  GElf_Ehdr ehdr_mem;
  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_mem);
  if (ehdr == NULL)
    return NULL;

  Elf_Data *d = gelf_xlatetom (elf, &crcdata, &conv, ehdr->e_ident[EI_DATA]);
  if (d == NULL)
    return NULL;
  assert (d == &crcdata);

  return rawdata->d_buf;
}


/* Find the separate debuginfo file for this module and open libelf on it.
   When we return success, MOD->debug is set up.  */
static Dwfl_Error
find_debuginfo (Dwfl_Module *mod)
{
  if (mod->debug.elf != NULL)
    return DWFL_E_NOERROR;

  GElf_Word debuglink_crc = 0;
  const char *debuglink_file = find_debuglink (mod->main.elf, &debuglink_crc);

  mod->debug.fd = (*mod->dwfl->callbacks->find_debuginfo) (MODCB_ARGS (mod),
							   mod->main.name,
							   debuglink_file,
							   debuglink_crc,
							   &mod->debug.name);
  return open_elf (mod, &mod->debug);
}


/* Try to find a symbol table in FILE.
   Returns DWFL_E_NOERROR if a proper one is found.
   Returns DWFL_E_NO_SYMTAB if not, but still sets results for SHT_DYNSYM.  */
static Dwfl_Error
load_symtab (struct dwfl_file *file, struct dwfl_file **symfile,
	     Elf_Scn **symscn, Elf_Scn **xndxscn,
	     size_t *syments, GElf_Word *strshndx)
{
  bool symtab = false;
  Elf_Scn *scn = NULL;
  while ((scn = elf_nextscn (file->elf, scn)) != NULL)
    {
      GElf_Shdr shdr_mem, *shdr = gelf_getshdr (scn, &shdr_mem);
      if (shdr != NULL)
	switch (shdr->sh_type)
	  {
	  case SHT_SYMTAB:
	    symtab = true;
	    *symscn = scn;
	    *symfile = file;
	    *strshndx = shdr->sh_link;
	    *syments = shdr->sh_size / shdr->sh_entsize;
	    if (*xndxscn != NULL)
	      return DWFL_E_NOERROR;
	    break;

	  case SHT_DYNSYM:
	    if (symtab)
	      break;
	    /* Use this if need be, but keep looking for SHT_SYMTAB.  */
	    *symscn = scn;
	    *symfile = file;
	    *strshndx = shdr->sh_link;
	    *syments = shdr->sh_size / shdr->sh_entsize;
	    break;

	  case SHT_SYMTAB_SHNDX:
	    *xndxscn = scn;
	    if (symtab)
	      return DWFL_E_NOERROR;
	    break;

	  default:
	    break;
	  }
    }

  if (symtab)
    /* We found one, though no SHT_SYMTAB_SHNDX to go with it.  */
    return DWFL_E_NOERROR;

  /* We found no SHT_SYMTAB, so any SHT_SYMTAB_SHNDX was bogus.
     We might have found an SHT_DYNSYM and set *SYMSCN et al though.  */
  *xndxscn = NULL;
  return DWFL_E_NO_SYMTAB;
}


/* Translate addresses into file offsets.
   OFFS[*] start out zero and remain zero if unresolved.  */
static void
find_offsets (Elf *elf, const GElf_Ehdr *ehdr, size_t n,
	      GElf_Addr addrs[n], GElf_Off offs[n])
{
  size_t unsolved = n;
  for (uint_fast16_t i = 0; i < ehdr->e_phnum; ++i)
    {
      GElf_Phdr phdr_mem;
      GElf_Phdr *phdr = gelf_getphdr (elf, i, &phdr_mem);
      if (phdr != NULL && phdr->p_type == PT_LOAD && phdr->p_memsz > 0)
	for (size_t j = 0; j < n; ++j)
	  if (offs[j] == 0
	      && addrs[j] >= phdr->p_vaddr
	      && addrs[j] - phdr->p_vaddr < phdr->p_filesz)
	    {
	      offs[j] = addrs[j] - phdr->p_vaddr + phdr->p_offset;
	      if (--unsolved == 0)
		break;
	    }
    }
}
/* Search an ELF file for a ".gnu_debuglink" section.  */
static const char *
find_debuglink (Elf *elf, GElf_Word *crc)
{
  size_t shstrndx;
  if (elf_getshdrstrndx (elf, &shstrndx) < 0)
    return NULL;

  Elf_Scn *scn = NULL;
  while ((scn = elf_nextscn (elf, scn)) != NULL)
    {
      GElf_Shdr shdr_mem;
      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
      if (shdr == NULL)
	return NULL;

      const char *name = elf_strptr (elf, shstrndx, shdr->sh_name);
      if (name == NULL)
	return NULL;

      if (!strcmp (name, ".gnu_debuglink"))
	break;
    }

  if (scn == NULL)
    return NULL;

  /* Found the .gnu_debuglink section.  Extract its contents.  */
  Elf_Data *rawdata = elf_rawdata (scn, NULL);
  if (rawdata == NULL)
    return NULL;

  Elf_Data crcdata =
    {
      .d_type = ELF_T_WORD,
      .d_buf = crc,
      .d_size = sizeof *crc,
      .d_version = EV_CURRENT,
    };
  Elf_Data conv =
    {
      .d_type = ELF_T_WORD,
      .d_buf = rawdata->d_buf + rawdata->d_size - sizeof *crc,
      .d_size = sizeof *crc,
      .d_version = EV_CURRENT,
    };

  GElf_Ehdr ehdr_mem;
  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_mem);
  if (ehdr == NULL)
    return NULL;

  Elf_Data *d = gelf_xlatetom (elf, &crcdata, &conv, ehdr->e_ident[EI_DATA]);
  if (d == NULL)
    return NULL;
  assert (d == &crcdata);

  return rawdata->d_buf;
}

/* If the main file might have been prelinked, then we need to
   discover the correct synchronization address between the main and
   debug files.  Because of prelink's section juggling, we cannot rely
   on the address_sync computed from PT_LOAD segments (see open_elf).

   We will attempt to discover a synchronization address based on the
   section headers instead.  But finding a section address that is
   safe to use requires identifying which sections are SHT_PROGBITS.
   We can do that in the main file, but in the debug file all the
   allocated sections have been transformed into SHT_NOBITS so we have
   lost the means to match them up correctly.

   The only method left to us is to decode the .gnu.prelink_undo
   section in the prelinked main file.  This shows what the sections
   looked like before prelink juggled them--when they still had a
   direct correspondence to the debug file.  */
static Dwfl_Error
find_prelink_address_sync (Dwfl_Module *mod, struct dwfl_file *file)
{
  /* The magic section is only identified by name.  */
  size_t shstrndx;
  if (elf_getshdrstrndx (mod->main.elf, &shstrndx) < 0)
    return DWFL_E_LIBELF;

  Elf_Scn *scn = NULL;
  while ((scn = elf_nextscn (mod->main.elf, scn)) != NULL)
    {
      GElf_Shdr shdr_mem;
      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
      if (unlikely (shdr == NULL))
	return DWFL_E_LIBELF;
      if (shdr->sh_type == SHT_PROGBITS
	  && !(shdr->sh_flags & SHF_ALLOC)
	  && shdr->sh_name != 0)
	{
	  const char *secname = elf_strptr (mod->main.elf, shstrndx,
					    shdr->sh_name);
	  if (unlikely (secname == NULL))
	    return DWFL_E_LIBELF;
	  if (!strcmp (secname, ".gnu.prelink_undo"))
	    break;
	}
    }

  if (scn == NULL)
    /* There was no .gnu.prelink_undo section.  */
    return DWFL_E_NOERROR;

  Elf_Data *undodata = elf_rawdata (scn, NULL);
  if (unlikely (undodata == NULL))
    return DWFL_E_LIBELF;

  /* Decode the section.  It consists of the original ehdr, phdrs,
     and shdrs (but omits section 0).  */

  union
  {
    Elf32_Ehdr e32;
    Elf64_Ehdr e64;
  } ehdr;
  Elf_Data dst =
    {
      .d_buf = &ehdr,
      .d_size = sizeof ehdr,
      .d_type = ELF_T_EHDR,
      .d_version = EV_CURRENT
    };
  Elf_Data src = *undodata;
  src.d_size = gelf_fsize (mod->main.elf, ELF_T_EHDR, 1, EV_CURRENT);
  src.d_type = ELF_T_EHDR;
  if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
			       elf_getident (mod->main.elf, NULL)[EI_DATA])
		== NULL))
    return DWFL_E_LIBELF;

  size_t shentsize = gelf_fsize (mod->main.elf, ELF_T_SHDR, 1, EV_CURRENT);
  size_t phentsize = gelf_fsize (mod->main.elf, ELF_T_PHDR, 1, EV_CURRENT);

  uint_fast16_t phnum;
  uint_fast16_t shnum;
  if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
    {
      if (ehdr.e32.e_shentsize != shentsize
	  || ehdr.e32.e_phentsize != phentsize)
	return DWFL_E_BAD_PRELINK;
      phnum = ehdr.e32.e_phnum;
      shnum = ehdr.e32.e_shnum;
    }
  else
    {
      if (ehdr.e64.e_shentsize != shentsize
	  || ehdr.e64.e_phentsize != phentsize)
	return DWFL_E_BAD_PRELINK;
      phnum = ehdr.e64.e_phnum;
      shnum = ehdr.e64.e_shnum;
    }

  /* Since prelink does not store the zeroth section header in the undo
     section, it cannot support SHN_XINDEX encoding.  */
  if (unlikely (shnum >= SHN_LORESERVE)
      || unlikely (undodata->d_size != (src.d_size
					+ phnum * phentsize
					+ (shnum - 1) * shentsize)))
    return DWFL_E_BAD_PRELINK;

  /* We look at the allocated SHT_PROGBITS (or SHT_NOBITS) sections.  (Most
     every file will have some SHT_PROGBITS sections, but it's possible to
     have one with nothing but .bss, i.e. SHT_NOBITS.)  The special sections
     that can be moved around have different sh_type values--except for
     .interp, the section that became the PT_INTERP segment.  So we exclude
     the SHT_PROGBITS section whose address matches the PT_INTERP p_vaddr.
     For this reason, we must examine the phdrs first to find PT_INTERP.  */

  GElf_Addr main_interp = 0;
  {
    size_t main_phnum;
    if (unlikely (elf_getphdrnum (mod->main.elf, &main_phnum)))
      return DWFL_E_LIBELF;
    for (size_t i = 0; i < main_phnum; ++i)
      {
	GElf_Phdr phdr;
	if (unlikely (gelf_getphdr (mod->main.elf, i, &phdr) == NULL))
	  return DWFL_E_LIBELF;
	if (phdr.p_type == PT_INTERP)
	  {
	    main_interp = phdr.p_vaddr;
	    break;
	  }
      }
  }

  src.d_buf += src.d_size;
  src.d_type = ELF_T_PHDR;
  src.d_size = phnum * phentsize;

  GElf_Addr undo_interp = 0;
  {
    union
    {
      Elf32_Phdr p32[phnum];
      Elf64_Phdr p64[phnum];
    } phdr;
    dst.d_buf = &phdr;
    dst.d_size = sizeof phdr;
    if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
				 ehdr.e32.e_ident[EI_DATA]) == NULL))
      return DWFL_E_LIBELF;
    if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
      {
	for (uint_fast16_t i = 0; i < phnum; ++i)
	  if (phdr.p32[i].p_type == PT_INTERP)
	    {
	      undo_interp = phdr.p32[i].p_vaddr;
	      break;
	    }
      }
    else
      {
	for (uint_fast16_t i = 0; i < phnum; ++i)
	  if (phdr.p64[i].p_type == PT_INTERP)
	    {
	      undo_interp = phdr.p64[i].p_vaddr;
	      break;
	    }
      }
  }

  if (unlikely ((main_interp == 0) != (undo_interp == 0)))
    return DWFL_E_BAD_PRELINK;

  src.d_buf += src.d_size;
  src.d_type = ELF_T_SHDR;
  src.d_size = gelf_fsize (mod->main.elf, ELF_T_SHDR, shnum - 1, EV_CURRENT);

  union
  {
    Elf32_Shdr s32[shnum - 1];
    Elf64_Shdr s64[shnum - 1];
  } shdr;
  dst.d_buf = &shdr;
  dst.d_size = sizeof shdr;
  if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
			       ehdr.e32.e_ident[EI_DATA]) == NULL))
    return DWFL_E_LIBELF;

  /* Now we can look at the original section headers of the main file
     before it was prelinked.  First we'll apply our method to the main
     file sections as they are after prelinking, to calculate the
     synchronization address of the main file.  Then we'll apply that
     same method to the saved section headers, to calculate the matching
     synchronization address of the debug file.

     The method is to consider SHF_ALLOC sections that are either
     SHT_PROGBITS or SHT_NOBITS, excluding the section whose sh_addr
     matches the PT_INTERP p_vaddr.  The special sections that can be
     moved by prelink have other types, except for .interp (which
     becomes PT_INTERP).  The "real" sections cannot move as such, but
     .bss can be split into .dynbss and .bss, with the total memory
     image remaining the same but being spread across the two sections.
     So we consider the highest section end, which still matches up.  */

  GElf_Addr highest;

  inline void consider_shdr (GElf_Addr interp,
			     GElf_Word sh_type,
			     GElf_Xword sh_flags,
			     GElf_Addr sh_addr,
			     GElf_Xword sh_size)
  {
    if ((sh_flags & SHF_ALLOC)
	&& ((sh_type == SHT_PROGBITS && sh_addr != interp)
	    || sh_type == SHT_NOBITS))
      {
	const GElf_Addr sh_end = sh_addr + sh_size;
	if (sh_end > highest)
	  highest = sh_end;
      }
  }

  highest = 0;
  scn = NULL;
  while ((scn = elf_nextscn (mod->main.elf, scn)) != NULL)
    {
      GElf_Shdr sh_mem;
      GElf_Shdr *sh = gelf_getshdr (scn, &sh_mem);
      if (unlikely (sh == NULL))
	return DWFL_E_LIBELF;
      consider_shdr (main_interp, sh->sh_type, sh->sh_flags,
		     sh->sh_addr, sh->sh_size);
    }
  if (highest > mod->main.vaddr)
    {
      mod->main.address_sync = highest;

      highest = 0;
      if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
	for (size_t i = 0; i < shnum - 1; ++i)
	  consider_shdr (undo_interp, shdr.s32[i].sh_type, shdr.s32[i].sh_flags,
			 shdr.s32[i].sh_addr, shdr.s32[i].sh_size);
      else
	for (size_t i = 0; i < shnum - 1; ++i)
	  consider_shdr (undo_interp, shdr.s64[i].sh_type, shdr.s64[i].sh_flags,
			 shdr.s64[i].sh_addr, shdr.s64[i].sh_size);

      if (highest > file->vaddr)
	file->address_sync = highest;
      else
	return DWFL_E_BAD_PRELINK;
    }

  return DWFL_E_NOERROR;
}

/* Find the separate debuginfo file for this module and open libelf on it.
   When we return success, MOD->debug is set up.  */
static Dwfl_Error
find_debuginfo (Dwfl_Module *mod)
{
  if (mod->debug.elf != NULL)
    return DWFL_E_NOERROR;

  GElf_Word debuglink_crc = 0;
  const char *debuglink_file = find_debuglink (mod->main.elf, &debuglink_crc);

  mod->debug.fd = (*mod->dwfl->callbacks->find_debuginfo) (MODCB_ARGS (mod),
							   mod->main.name,
							   debuglink_file,
							   debuglink_crc,
							   &mod->debug.name);
  Dwfl_Error result = open_elf (mod, &mod->debug);
  if (result == DWFL_E_NOERROR && mod->debug.address_sync != 0)
    result = find_prelink_address_sync (mod, &mod->debug);
  return result;
}


/* Try to find a symbol table in FILE.
   Returns DWFL_E_NOERROR if a proper one is found.
   Returns DWFL_E_NO_SYMTAB if not, but still sets results for SHT_DYNSYM.  */
static Dwfl_Error
load_symtab (struct dwfl_file *file, struct dwfl_file **symfile,
	     Elf_Scn **symscn, Elf_Scn **xndxscn,
	     size_t *syments, int *first_global, GElf_Word *strshndx)
{
  bool symtab = false;
  Elf_Scn *scn = NULL;
  while ((scn = elf_nextscn (file->elf, scn)) != NULL)
    {
      GElf_Shdr shdr_mem, *shdr = gelf_getshdr (scn, &shdr_mem);
      if (shdr != NULL)
	switch (shdr->sh_type)
	  {
	  case SHT_SYMTAB:
	    symtab = true;
	    *symscn = scn;
	    *symfile = file;
	    *strshndx = shdr->sh_link;
	    *syments = shdr->sh_size / shdr->sh_entsize;
	    *first_global = shdr->sh_info;
	    if (*xndxscn != NULL)
	      return DWFL_E_NOERROR;
	    break;

	  case SHT_DYNSYM:
	    if (symtab)
	      break;
	    /* Use this if need be, but keep looking for SHT_SYMTAB.  */
	    *symscn = scn;
	    *symfile = file;
	    *strshndx = shdr->sh_link;
	    *syments = shdr->sh_size / shdr->sh_entsize;
	    *first_global = shdr->sh_info;
	    break;

	  case SHT_SYMTAB_SHNDX:
	    *xndxscn = scn;
	    if (symtab)
	      return DWFL_E_NOERROR;
	    break;

	  default:
	    break;
	  }
    }

  if (symtab)
    /* We found one, though no SHT_SYMTAB_SHNDX to go with it.  */
    return DWFL_E_NOERROR;

  /* We found no SHT_SYMTAB, so any SHT_SYMTAB_SHNDX was bogus.
     We might have found an SHT_DYNSYM and set *SYMSCN et al though.  */
  *xndxscn = NULL;
  return DWFL_E_NO_SYMTAB;
}


/* Translate addresses into file offsets.
   OFFS[*] start out zero and remain zero if unresolved.  */
static void
find_offsets (Elf *elf, size_t phnum, size_t n,
	      GElf_Addr addrs[n], GElf_Off offs[n])
{
  size_t unsolved = n;
  for (size_t i = 0; i < phnum; ++i)
    {
      GElf_Phdr phdr_mem;
      GElf_Phdr *phdr = gelf_getphdr (elf, i, &phdr_mem);
      if (phdr != NULL && phdr->p_type == PT_LOAD && phdr->p_memsz > 0)
	for (size_t j = 0; j < n; ++j)
	  if (offs[j] == 0
	      && addrs[j] >= phdr->p_vaddr
	      && addrs[j] - phdr->p_vaddr < phdr->p_filesz)
	    {
	      offs[j] = addrs[j] - phdr->p_vaddr + phdr->p_offset;
	      if (--unsolved == 0)
		break;
	    }
    }
}