[lld][Webassembly] Avoid a signed overflow on large sections (#183225)

wasm sections sizes are specified as u32s, and thus can be as large as
4GB. wasm-ld currently stores the offset into a section as an int32_t
which overflows on large sections and results in a crash. This change
makes it a int64_t to accommodate any valid wasm section and allow
catching even larger sections instead of wrapping around.

This PR fixes the issue by storing the offset as a int64_t, as well as
adding extra checks to handle un-encodeable sections to fail instead of
producing garbage wasm binaries, and also adds lit tests to make sure it
works. I confirmed the test fails on main but passes with this fix.

This is the same as https://github.com/llvm/llvm-project/pull/178287 but
deletes the temporary files the tests create and requires the tests run
on a 64-bit platform to avoid OOM issues due to the large binaries it
creates.
This commit is contained in:
Fatih BAKIR 2026-02-25 10:20:57 -08:00 committed by GitHub
parent d2d862a544
commit a52f6110ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 110 additions and 2 deletions

View File

@ -0,0 +1,31 @@
# REQUIRES: llvm-64-bits
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/debug1.s -o %t/debug1.o
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/debug2.s -o %t/debug2.o
# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output
# RUN: wasm-ld --no-entry --no-gc-sections %t/debug1.o %t/debug2.o -o %t/combined.wasm
# RUN: llvm-readobj --sections %t/combined.wasm | FileCheck %s
# RUN: rm %t/debug1.o %t/debug2.o %t/combined.wasm
# Check that the linker doesn't crash with large debug sections that together exceed 2GB.
# CHECK: Type: CUSTOM (0x0)
# Total size: 2214592520 + 134217728 = 2348810248
# CHECK-NEXT: Size: 2348810248
# CHECK: Name: .debug_info
# A 2GB + some extra bytes debug section to make sure we go over 2G
#--- debug1.s
.section .debug_info,"",@
.int32 0xAAAAAAAA
.int32 0xBBBBBBBB
.zero 2214592504
.int32 0xCCCCCCCC
.int32 0xDDDDDDDD
#--- debug2.s
.section .debug_info,"",@
.int32 0x11111111
.int32 0x22222222
.zero 134217712
.int32 0x44444444
.int32 0x55555555

View File

@ -0,0 +1,37 @@
# REQUIRES: llvm-64-bits
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk1.s -o %t/chunk1.o
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk2.s -o %t/chunk2.o
# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output
# RUN: wasm-ld --no-entry --no-gc-sections %t/chunk1.o %t/chunk2.o -o %t/combined.wasm
# RUN: llvm-readobj --sections %t/combined.wasm | FileCheck %s
# RUN: rm %t/chunk1.o %t/chunk2.o %t/combined.wasm
# Check that the linker doesn't crash with large data sections that together exceed 2GB.
# CHECK: Type: DATA (0xB)
# CHECK-NEXT: Size: 2348810260
# A 2GB + some extra bytes of data to make sure we go over 2G
#--- chunk1.s
.section .data.chunk1,"",@
.globl chunk1_start
.type chunk1_start,@object
chunk1_start:
.int32 0xAAAAAAAA
.int32 0xBBBBBBBB
.zero 2214592504
.int32 0xCCCCCCCC
.int32 0xDDDDDDDD
.size chunk1_start, 2214592512
#--- chunk2.s
.section .data.chunk2,"",@
.globl chunk2_start
.type chunk2_start,@object
chunk2_start:
.int32 0x11111111
.int32 0x22222222
.zero 134217712
.int32 0x44444444
.int32 0x55555555
.size chunk2_start, 134217728

View File

@ -0,0 +1,23 @@
# REQUIRES: llvm-64-bits
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk1.s -o %t/chunk1.o
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %t/chunk2.s -o %t/chunk2.o
# --no-gc-sections to prevent the linker from optimizing the chunk away, otherwise it produces a tiny output
# RUN: not wasm-ld --no-entry --no-gc-sections %t/chunk1.o %t/chunk2.o -o %t/combined.wasm 2>&1 | FileCheck %s
# RUN: rm %t/chunk1.o %t/chunk2.o
# Check that the linker fails gracefully when a section exceeds 4GB.
# CHECK: error: section '.debug_info' too large to encode:
# Two chunks that together exceed 4GB
#--- chunk1.s
.section .debug_info,"",@
.int32 0xAAAAAAAA
.zero 2147483640
.int32 0xBBBBBBBB
#--- chunk2.s
.section .debug_info,"",@
.int32 0x11111111
.zero 2147483640
.int32 0x22222222

View File

@ -97,7 +97,11 @@ public:
// After assignAddresses is called, this represents the offset from
// the beginning of the output section this chunk was assigned to.
int32_t outSecOff = 0;
//
// WASM sections can be up to 4GB. We use a larger, signed integer here to
// be able to detect section size overflow instead of a silent wrap-around
// and corrupted output sections.
int64_t outSecOff = 0;
uint8_t sectionKind : 3;

View File

@ -59,6 +59,10 @@ void CodeSection::finalizeContents() {
bodySize += func->getSize();
}
if (bodySize > UINT32_MAX) {
error("section too large to encode: " + Twine(bodySize) + " bytes");
}
createHeader(bodySize);
}
@ -157,6 +161,10 @@ void DataSection::finalizeContents() {
}
}
if (bodySize > UINT32_MAX) {
error("section too large to encode: " + Twine(bodySize) + " bytes");
}
createHeader(bodySize);
}
@ -232,7 +240,7 @@ void CustomSection::finalizeInputSections() {
return;
mergedSection->finalizeContents();
inputSections = std::move(newSections);
inputSections = newSections;
}
void CustomSection::finalizeContents() {
@ -249,6 +257,11 @@ void CustomSection::finalizeContents() {
payloadSize += section->getSize();
}
if (payloadSize > UINT32_MAX) {
error("section '" + name + "' too large to encode: " + Twine(payloadSize) +
" bytes");
}
createHeader(payloadSize + nameData.size());
}