From c90801e62906f454eb3d478683cd54bce5d8d7b3 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@cs.stanford.edu>
Date: Sat, 16 Apr 2022 13:18:09 +0800
Subject: [PATCH] [ELF] Make --gdb-index work for compressed input debug
 sections

Previously, --gdb-index tries to read bogus compressed data from
input sections if input debug sections are compressed.

Fixes https://github.com/rui314/mold/issues/431
---
 elf/arch-riscv64.cc   |  4 ++--
 elf/input-files.cc    | 10 ++--------
 elf/input-sections.cc | 44 +++++++++++++++++++++++++------------------
 elf/mold.h            | 14 ++++++--------
 elf/output-chunks.cc  |  2 ++
 test/elf/gdb-index.sh | 21 +++++++++++++--------
 6 files changed, 51 insertions(+), 44 deletions(-)
diff --git a/elf/arch-riscv64.cc b/elf/arch-riscv64.cc
index aa5d7b1c..955407f2 100644
--- a/elf/arch-riscv64.cc
+++ b/elf/arch-riscv64.cc
@@ -490,8 +490,8 @@ template <>
 void InputSection<E>::copy_contents_riscv(Context<E> &ctx, u8 *buf) {
   // A non-alloc section isn't relaxed, so just copy it as one big chunk.
   if (!(shdr().sh_flags & SHF_ALLOC)) {
-    if (is_compressed())
-      uncompress(ctx, buf);
+    if (compressed)
+      uncompress_to(ctx, buf);
     else
       memcpy(buf, contents.data(), contents.size());
     return;
diff --git a/elf/input-files.cc b/elf/input-files.cc
index 6980e21a..d44216af 100644
--- a/elf/input-files.cc
+++ b/elf/input-files.cc
@@ -524,16 +524,10 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
                                                sec.shdr().sh_flags);
   rec->p2align = sec.p2align;
 
-  std::string_view data = sec.contents;
-
   // If thes section contents are compressed, uncompress them.
-  if (sec.is_compressed()) {
-    u8 *buf = new u8[sec.sh_size];
-    sec.uncompress(ctx, buf);
-    data = {(char *)buf, sec.sh_size};
-    ctx.string_pool.emplace_back(buf);
-  }
+  sec.uncompress(ctx);
 
+  std::string_view data = sec.contents;
   const char *begin = data.data();
   u64 entsize = sec.shdr().sh_entsize;
   HyperLogLog estimator;
diff --git a/elf/input-sections.cc b/elf/input-sections.cc
index cfe1c570..81663cdb 100644
--- a/elf/input-sections.cc
+++ b/elf/input-sections.cc
@@ -38,8 +38,6 @@ InputSection<E>::InputSection(Context<E> &ctx, ObjectFile<E> &file,
   if (shndx < file.elf_sections.size())
     contents = {(char *)file.mf->data + shdr().sh_offset, shdr().sh_size};
 
-  bool compressed;
-
   if (name.starts_with(".zdebug")) {
     sh_size = *(ubig64 *)&contents[4];
     p2align = to_p2align(shdr().sh_addralign);
@@ -55,28 +53,37 @@ InputSection<E>::InputSection(Context<E> &ctx, ObjectFile<E> &file,
     compressed = false;
   }
 
-  // Uncompress early if the relocation is REL-type so that we can read
-  // addends from section contents. If RELA-type, we don't need to do this
-  // because addends are in relocations.
-  if (compressed && E::is_rel) {
-    u8 *buf = new u8[sh_size];
-    uncompress(ctx, buf);
-    contents = {(char *)buf, sh_size};
-    ctx.string_pool.emplace_back(buf);
-  }
+  // Sections may have been compressed. We usually uncompress them
+  // directly into the mmap'ed output file, but we want to uncompress
+  // early for REL-type ELF types to read relocation addends from
+  // section contents. For RELA-type, we don't need to do this because
+  // addends are in relocations.
+  if (E::is_rel)
+    uncompress(ctx);
 
   output_section =
     OutputSection<E>::get_instance(ctx, name, shdr().sh_type, shdr().sh_flags);
 }
 
 template <typename E>
-bool InputSection<E>::is_compressed() {
-  return !E::is_rel &&
-         (name().starts_with(".zdebug") || (shdr().sh_flags & SHF_COMPRESSED));
+void InputSection<E>::uncompress(Context<E> &ctx) {
+  if (!compressed || uncompressed)
+    return;
+
+  u8 *buf = new u8[sh_size];
+  uncompress_to(ctx, buf);
+  contents = {(char *)buf, sh_size};
+  ctx.string_pool.emplace_back(buf);
+  uncompressed = true;
 }
 
 template <typename E>
-void InputSection<E>::uncompress(Context<E> &ctx, u8 *buf) {
+void InputSection<E>::uncompress_to(Context<E> &ctx, u8 *buf) {
+  if (!compressed || uncompressed) {
+    memcpy(buf, contents.data(), contents.size());
+    return;
+  }
+
   auto do_uncompress = [&](std::string_view data) {
     unsigned long size = sh_size;
     if (::uncompress(buf, &size, (u8 *)data.data(), data.size()) != Z_OK)
@@ -100,7 +107,8 @@ void InputSection<E>::uncompress(Context<E> &ctx, u8 *buf) {
 
   ElfChdr<E> &hdr = *(ElfChdr<E> *)&contents[0];
   if (hdr.ch_type != ELFCOMPRESS_ZLIB)
-    Fatal(ctx) << *this << ": unsupported compression type";
+    Fatal(ctx) << *this << ": unsupported compression type: 0x"
+               << std::hex << hdr.ch_type;
   do_uncompress(contents.substr(sizeof(ElfChdr<E>)));
 }
 
@@ -213,8 +221,8 @@ void InputSection<E>::write_to(Context<E> &ctx, u8 *buf) {
   // Copy data
   if constexpr (std::is_same_v<E, RISCV64>) {
     copy_contents_riscv(ctx, buf);
-  } else if (is_compressed()) {
-    uncompress(ctx, buf);
+  } else if (compressed) {
+    uncompress_to(ctx, buf);
   } else {
     memcpy(buf, contents.data(), contents.size());
   }
diff --git a/elf/mold.h b/elf/mold.h
index b36446cb..d38fec9a 100644
--- a/elf/mold.h
+++ b/elf/mold.h
@@ -266,8 +266,8 @@ class InputSection {
   InputSection(Context<E> &ctx, ObjectFile<E> &file, std::string_view name,
                i64 shndx);
 
-  bool is_compressed();
-  void uncompress(Context<E> &ctx, u8 *buf);
+  void uncompress(Context<E> &ctx);
+  void uncompress_to(Context<E> &ctx, u8 *buf);
   void scan_relocations(Context<E> &ctx);
   void write_to(Context<E> &ctx, u8 *buf);
   void apply_reloc_alloc(Context<E> &ctx, u8 *base);
@@ -307,10 +307,12 @@ class InputSection {
 
   // For COMDAT de-duplication and garbage collection
   std::atomic_bool is_alive = true;
-  bool killed_by_icf = false;
-
   u8 p2align = 0;
 
+  u8 compressed : 1 = false;
+  u8 uncompressed : 1 = false;
+  u8 killed_by_icf : 1 = false;
+
 private:
   typedef enum : u8 { NONE, ERROR, COPYREL, PLT, CPLT, DYNREL, BASEREL } Action;
 
@@ -1142,10 +1144,6 @@ class ObjectFile : public InputFile<E> {
                        const ElfSym<E> &esym, i64 symidx);
   void merge_visibility(Context<E> &ctx, Symbol<E> &sym, u8 visibility);
 
-  std::pair<std::string_view, const ElfShdr<E> *>
-  uncompress_contents(Context<E> &ctx, const ElfShdr<E> &shdr,
-                      std::string_view name);
-
   bool has_common_symbol = false;
 
   std::string_view symbol_strtab;
diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc
index abb278ef..315172bb 100644
--- a/elf/output-chunks.cc
+++ b/elf/output-chunks.cc
@@ -2407,6 +2407,7 @@ void GdbIndexSection<E>::write_address_areas(Context<E> &ctx) {
 template <typename E>
 std::vector<std::string_view>
 GdbIndexSection<E>::read_compunits(Context<E> &ctx, ObjectFile<E> &file) {
+  file.debug_info->uncompress(ctx);
   std::string_view data = file.debug_info->contents;
   std::vector<std::string_view> vec;
 
@@ -2444,6 +2445,7 @@ GdbIndexSection<E>::read_pubnames(Context<E> &ctx, ObjectFile<E> &file) {
   };
 
   auto read = [&](InputSection<E> &isec) {
+    isec.uncompress(ctx);
     std::string_view contents = isec.contents;
 
     while (!contents.empty()) {