[lld][Webassembly] Avoid a signed overflow on large sections (#183225)

wasm sections sizes are specified as u32s, and thus can be as large as 4GB. wasm-ld currently stores the offset into a section as an int32_t which overflows on large sections and results in a crash. This change makes it a int64_t to accommodate any valid wasm section and allow catching even larger sections instead of wrapping around. This PR fixes the issue by storing the offset as a int64_t, as well as adding extra checks to handle un-encodeable sections to fail instead of producing garbage wasm binaries, and also adds lit tests to make sure it works. I confirmed the test fails on main but passes with this fix. This is the same as https://github.com/llvm/llvm-project/pull/178287 but deletes the temporary files the tests create and requires the tests run on a 64-bit platform to avoid OOM issues due to the large binaries it creates.
2026-02-25 10:20:57 -08:00 · 2026-02-25 10:20:57 -08:00 · a52f6110ec
commit a52f6110ec
parent d2d862a544
5 changed files with 110 additions and 2 deletions
--- a/lld/test/wasm/large-debug-section.test
+++ b/lld/test/wasm/large-debug-section.test
@ -0,0 +1,31 @@
+# REQUIRES: llvm-64-bits
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/debug1.s -o %t/debug1.o
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/debug2.s -o %t/debug2.o
+# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output
+# RUN: wasm-ld --no-entry --no-gc-sections %t/debug1.o %t/debug2.o -o %t/combined.wasm
+# RUN: llvm-readobj --sections %t/combined.wasm | FileCheck %s
+# RUN: rm %t/debug1.o %t/debug2.o %t/combined.wasm
+
+# Check that the linker doesn't crash with large debug sections that together exceed 2GB.
+# CHECK: Type: CUSTOM (0x0)
+# Total size: 2214592520 + 134217728 = 2348810248
+# CHECK-NEXT: Size: 2348810248
+# CHECK: Name: .debug_info
+
+# A 2GB + some extra bytes debug section to make sure we go over 2G
+#--- debug1.s
+.section .debug_info,"",@
+  .int32 0xAAAAAAAA
+  .int32 0xBBBBBBBB
+  .zero 2214592504
+  .int32 0xCCCCCCCC
+  .int32 0xDDDDDDDD
+
+#--- debug2.s
+.section .debug_info,"",@
+  .int32 0x11111111
+  .int32 0x22222222
+  .zero 134217712
+  .int32 0x44444444
+  .int32 0x55555555
--- a/lld/test/wasm/large-section.test
+++ b/lld/test/wasm/large-section.test
@ -0,0 +1,37 @@
+# REQUIRES: llvm-64-bits
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk1.s -o %t/chunk1.o
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk2.s -o %t/chunk2.o
+# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output
+# RUN: wasm-ld --no-entry --no-gc-sections %t/chunk1.o %t/chunk2.o -o %t/combined.wasm
+# RUN: llvm-readobj --sections %t/combined.wasm | FileCheck %s
+# RUN: rm %t/chunk1.o %t/chunk2.o %t/combined.wasm
+
+# Check that the linker doesn't crash with large data sections that together exceed 2GB.
+# CHECK: Type: DATA (0xB)
+# CHECK-NEXT: Size: 2348810260
+
+# A 2GB + some extra bytes of data to make sure we go over 2G 
+#--- chunk1.s
+.section .data.chunk1,"",@
+.globl chunk1_start
+.type chunk1_start,@object
+chunk1_start:
+  .int32 0xAAAAAAAA
+  .int32 0xBBBBBBBB
+  .zero 2214592504
+  .int32 0xCCCCCCCC
+  .int32 0xDDDDDDDD
+.size chunk1_start, 2214592512
+
+#--- chunk2.s
+.section .data.chunk2,"",@
+.globl chunk2_start
+.type chunk2_start,@object
+chunk2_start:
+  .int32 0x11111111
+  .int32 0x22222222
+  .zero 134217712
+  .int32 0x44444444
+  .int32 0x55555555
+.size chunk2_start, 134217728
--- a/lld/test/wasm/section-too-large.test
+++ b/lld/test/wasm/section-too-large.test
@ -0,0 +1,23 @@
+# REQUIRES: llvm-64-bits
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk1.s -o %t/chunk1.o
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk2.s -o %t/chunk2.o
+# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output
+# RUN: not wasm-ld --no-entry --no-gc-sections %t/chunk1.o %t/chunk2.o -o %t/combined.wasm 2>&1 | FileCheck %s
+# RUN: rm %t/chunk1.o %t/chunk2.o
+
+# Check that the linker fails gracefully when a section exceeds 4GB.
+# CHECK: error: section '.debug_info' too large to encode:
+
+# Two chunks that together exceed 4GB
+#--- chunk1.s
+.section .debug_info,"",@
+  .int32 0xAAAAAAAA
+  .zero 2147483640
+  .int32 0xBBBBBBBB
+
+#--- chunk2.s
+.section .debug_info,"",@
+  .int32 0x11111111
+  .zero 2147483640
+  .int32 0x22222222
--- a/lld/wasm/InputChunks.h
+++ b/lld/wasm/InputChunks.h
@ -97,7 +97,11 @@ public:

  // After assignAddresses is called, this represents the offset from
  // the beginning of the output section this chunk was assigned to.
-  int32_t outSecOff = 0;
+  //
+  // WASM sections can be up to 4GB. We use a larger, signed integer here to
+  // be able to detect section size overflow instead of a silent wrap-around
+  // and corrupted output sections.
+  int64_t outSecOff = 0;

  uint8_t sectionKind : 3;

--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@ -59,6 +59,10 @@ void CodeSection::finalizeContents() {
    bodySize += func->getSize();
  }

+  if (bodySize > UINT32_MAX) {
+    error("section too large to encode: " + Twine(bodySize) + " bytes");
+  }
+
  createHeader(bodySize);
 }

@ -157,6 +161,10 @@ void DataSection::finalizeContents() {
    }
  }

+  if (bodySize > UINT32_MAX) {
+    error("section too large to encode: " + Twine(bodySize) + " bytes");
+  }
+
  createHeader(bodySize);
 }

@ -232,7 +240,7 @@ void CustomSection::finalizeInputSections() {
    return;

  mergedSection->finalizeContents();
-  inputSections = std::move(newSections);
+  inputSections = newSections;
 }

 void CustomSection::finalizeContents() {
@ -249,6 +257,11 @@ void CustomSection::finalizeContents() {
    payloadSize += section->getSize();
  }

+  if (payloadSize > UINT32_MAX) {
+    error("section '" + name + "' too large to encode: " + Twine(payloadSize) +
+          " bytes");
+  }
+
  createHeader(payloadSize + nameData.size());
 }