From a52f6110ec8952bca4b7679d32cc2e5422bff1e0 Mon Sep 17 00:00:00 2001 From: Fatih BAKIR Date: Wed, 25 Feb 2026 10:20:57 -0800 Subject: [PATCH] [lld][Webassembly] Avoid a signed overflow on large sections (#183225) wasm sections sizes are specified as u32s, and thus can be as large as 4GB. wasm-ld currently stores the offset into a section as an int32_t which overflows on large sections and results in a crash. This change makes it a int64_t to accommodate any valid wasm section and allow catching even larger sections instead of wrapping around. This PR fixes the issue by storing the offset as a int64_t, as well as adding extra checks to handle un-encodeable sections to fail instead of producing garbage wasm binaries, and also adds lit tests to make sure it works. I confirmed the test fails on main but passes with this fix. This is the same as https://github.com/llvm/llvm-project/pull/178287 but deletes the temporary files the tests create and requires the tests run on a 64-bit platform to avoid OOM issues due to the large binaries it creates. --- lld/test/wasm/large-debug-section.test | 31 +++++++++++++++++++++ lld/test/wasm/large-section.test | 37 ++++++++++++++++++++++++++ lld/test/wasm/section-too-large.test | 23 ++++++++++++++++ lld/wasm/InputChunks.h | 6 ++++- lld/wasm/OutputSections.cpp | 15 ++++++++++- 5 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 lld/test/wasm/large-debug-section.test create mode 100644 lld/test/wasm/large-section.test create mode 100644 lld/test/wasm/section-too-large.test diff --git a/lld/test/wasm/large-debug-section.test b/lld/test/wasm/large-debug-section.test new file mode 100644 index 000000000000..8690594c30b1 --- /dev/null +++ b/lld/test/wasm/large-debug-section.test @@ -0,0 +1,31 @@ +# REQUIRES: llvm-64-bits +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/debug1.s -o %t/debug1.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/debug2.s -o %t/debug2.o +# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output +# RUN: wasm-ld --no-entry --no-gc-sections %t/debug1.o %t/debug2.o -o %t/combined.wasm +# RUN: llvm-readobj --sections %t/combined.wasm | FileCheck %s +# RUN: rm %t/debug1.o %t/debug2.o %t/combined.wasm + +# Check that the linker doesn't crash with large debug sections that together exceed 2GB. +# CHECK: Type: CUSTOM (0x0) +# Total size: 2214592520 + 134217728 = 2348810248 +# CHECK-NEXT: Size: 2348810248 +# CHECK: Name: .debug_info + +# A 2GB + some extra bytes debug section to make sure we go over 2G +#--- debug1.s +.section .debug_info,"",@ + .int32 0xAAAAAAAA + .int32 0xBBBBBBBB + .zero 2214592504 + .int32 0xCCCCCCCC + .int32 0xDDDDDDDD + +#--- debug2.s +.section .debug_info,"",@ + .int32 0x11111111 + .int32 0x22222222 + .zero 134217712 + .int32 0x44444444 + .int32 0x55555555 diff --git a/lld/test/wasm/large-section.test b/lld/test/wasm/large-section.test new file mode 100644 index 000000000000..2e9d5f580e88 --- /dev/null +++ b/lld/test/wasm/large-section.test @@ -0,0 +1,37 @@ +# REQUIRES: llvm-64-bits +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk1.s -o %t/chunk1.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk2.s -o %t/chunk2.o +# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output +# RUN: wasm-ld --no-entry --no-gc-sections %t/chunk1.o %t/chunk2.o -o %t/combined.wasm +# RUN: llvm-readobj --sections %t/combined.wasm | FileCheck %s +# RUN: rm %t/chunk1.o %t/chunk2.o %t/combined.wasm + +# Check that the linker doesn't crash with large data sections that together exceed 2GB. +# CHECK: Type: DATA (0xB) +# CHECK-NEXT: Size: 2348810260 + +# A 2GB + some extra bytes of data to make sure we go over 2G +#--- chunk1.s +.section .data.chunk1,"",@ +.globl chunk1_start +.type chunk1_start,@object +chunk1_start: + .int32 0xAAAAAAAA + .int32 0xBBBBBBBB + .zero 2214592504 + .int32 0xCCCCCCCC + .int32 0xDDDDDDDD +.size chunk1_start, 2214592512 + +#--- chunk2.s +.section .data.chunk2,"",@ +.globl chunk2_start +.type chunk2_start,@object +chunk2_start: + .int32 0x11111111 + .int32 0x22222222 + .zero 134217712 + .int32 0x44444444 + .int32 0x55555555 +.size chunk2_start, 134217728 diff --git a/lld/test/wasm/section-too-large.test b/lld/test/wasm/section-too-large.test new file mode 100644 index 000000000000..741fb18e28c5 --- /dev/null +++ b/lld/test/wasm/section-too-large.test @@ -0,0 +1,23 @@ +# REQUIRES: llvm-64-bits +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk1.s -o %t/chunk1.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk2.s -o %t/chunk2.o +# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output +# RUN: not wasm-ld --no-entry --no-gc-sections %t/chunk1.o %t/chunk2.o -o %t/combined.wasm 2>&1 | FileCheck %s +# RUN: rm %t/chunk1.o %t/chunk2.o + +# Check that the linker fails gracefully when a section exceeds 4GB. +# CHECK: error: section '.debug_info' too large to encode: + +# Two chunks that together exceed 4GB +#--- chunk1.s +.section .debug_info,"",@ + .int32 0xAAAAAAAA + .zero 2147483640 + .int32 0xBBBBBBBB + +#--- chunk2.s +.section .debug_info,"",@ + .int32 0x11111111 + .zero 2147483640 + .int32 0x22222222 diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h index 1fe78d76631f..e772894d9545 100644 --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -97,7 +97,11 @@ public: // After assignAddresses is called, this represents the offset from // the beginning of the output section this chunk was assigned to. - int32_t outSecOff = 0; + // + // WASM sections can be up to 4GB. We use a larger, signed integer here to + // be able to detect section size overflow instead of a silent wrap-around + // and corrupted output sections. + int64_t outSecOff = 0; uint8_t sectionKind : 3; diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp index d6348e459d31..454dc7293408 100644 --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -59,6 +59,10 @@ void CodeSection::finalizeContents() { bodySize += func->getSize(); } + if (bodySize > UINT32_MAX) { + error("section too large to encode: " + Twine(bodySize) + " bytes"); + } + createHeader(bodySize); } @@ -157,6 +161,10 @@ void DataSection::finalizeContents() { } } + if (bodySize > UINT32_MAX) { + error("section too large to encode: " + Twine(bodySize) + " bytes"); + } + createHeader(bodySize); } @@ -232,7 +240,7 @@ void CustomSection::finalizeInputSections() { return; mergedSection->finalizeContents(); - inputSections = std::move(newSections); + inputSections = newSections; } void CustomSection::finalizeContents() { @@ -249,6 +257,11 @@ void CustomSection::finalizeContents() { payloadSize += section->getSize(); } + if (payloadSize > UINT32_MAX) { + error("section '" + name + "' too large to encode: " + Twine(payloadSize) + + " bytes"); + } + createHeader(payloadSize + nameData.size()); }