Dan Gohman e665e781dc
[SelectionDAG] Use the nuw flag when expanding loads. (#119288)
When expanding a load into two loads, use nuw for the add that computes
the offset from the base of the second load, because the original load
doesn't straddle the address space.

It turns out there's already a dedicated helper function for doing this,
`getObjectPtrOffset`.

This is in target-independent code, however in practice it only seems to
affact WebAssembly code, because WebAssembly load and store
instructions' constant offsets don't perform wrapping, so constant
folding often depends on the nuw flag being present.

This was noticed in the development of #119204.
2024-12-10 06:28:09 -08:00

344 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
; Test a subset of compiler-rt/libm libcalls expected to be emitted by the wasm backend
target triple = "wasm32-unknown-unknown"
declare fp128 @llvm.sqrt.f128(fp128)
declare fp128 @llvm.floor.f128(fp128)
declare fp128 @llvm.trunc.f128(fp128)
declare fp128 @llvm.nearbyint.f128(fp128)
declare fp128 @llvm.pow.f128(fp128, fp128)
declare fp128 @llvm.powi.f128.i32(fp128, i32)
declare double @llvm.tan.f64(double)
declare double @llvm.cos.f64(double)
declare double @llvm.log10.f64(double)
declare double @llvm.pow.f64(double, double)
declare double @llvm.powi.f64.i32(double, i32)
declare double @llvm.log.f64(double)
declare double @llvm.exp.f64(double)
declare double @llvm.exp10.f64(double)
declare double @llvm.ldexp.f64.i32(double, i32)
declare {double, i32} @llvm.frexp.f64.i32(double)
declare i32 @llvm.lround(double)
declare void @escape_value(i32)
define fp128 @fp128libcalls(fp128 %x, fp128 %y, i32 %z) {
; compiler-rt call
; CHECK-LABEL: fp128libcalls:
; CHECK: .functype fp128libcalls (i32, i64, i64, i64, i64, i32) -> ()
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get $push18=, __stack_pointer
; CHECK-NEXT: i32.const $push19=, 144
; CHECK-NEXT: i32.sub $push39=, $pop18, $pop19
; CHECK-NEXT: local.tee $push38=, 6, $pop39
; CHECK-NEXT: global.set __stack_pointer, $pop38
; CHECK-NEXT: local.get $push40=, 6
; CHECK-NEXT: i32.const $push36=, 128
; CHECK-NEXT: i32.add $push37=, $pop40, $pop36
; CHECK-NEXT: local.get $push44=, 1
; CHECK-NEXT: local.get $push43=, 2
; CHECK-NEXT: local.get $push42=, 3
; CHECK-NEXT: local.get $push41=, 4
; CHECK-NEXT: call __addtf3, $pop37, $pop44, $pop43, $pop42, $pop41
; CHECK-NEXT: local.get $push45=, 6
; CHECK-NEXT: i32.const $push34=, 112
; CHECK-NEXT: i32.add $push35=, $pop45, $pop34
; CHECK-NEXT: local.get $push46=, 6
; CHECK-NEXT: i64.load $push1=, 128($pop46)
; CHECK-NEXT: local.get $push47=, 6
; CHECK-NEXT: i64.load $push0=, 136($pop47)
; CHECK-NEXT: local.get $push49=, 3
; CHECK-NEXT: local.get $push48=, 4
; CHECK-NEXT: call __multf3, $pop35, $pop1, $pop0, $pop49, $pop48
; CHECK-NEXT: local.get $push50=, 6
; CHECK-NEXT: i32.const $push32=, 96
; CHECK-NEXT: i32.add $push33=, $pop50, $pop32
; CHECK-NEXT: local.get $push51=, 6
; CHECK-NEXT: i64.load $push3=, 112($pop51)
; CHECK-NEXT: local.get $push52=, 6
; CHECK-NEXT: i64.load $push2=, 120($pop52)
; CHECK-NEXT: local.get $push54=, 3
; CHECK-NEXT: local.get $push53=, 4
; CHECK-NEXT: call __divtf3, $pop33, $pop3, $pop2, $pop54, $pop53
; CHECK-NEXT: local.get $push55=, 6
; CHECK-NEXT: i32.const $push30=, 80
; CHECK-NEXT: i32.add $push31=, $pop55, $pop30
; CHECK-NEXT: local.get $push56=, 6
; CHECK-NEXT: i64.load $push5=, 96($pop56)
; CHECK-NEXT: local.get $push57=, 6
; CHECK-NEXT: i64.load $push4=, 104($pop57)
; CHECK-NEXT: call sqrtl, $pop31, $pop5, $pop4
; CHECK-NEXT: local.get $push58=, 6
; CHECK-NEXT: i32.const $push28=, 64
; CHECK-NEXT: i32.add $push29=, $pop58, $pop28
; CHECK-NEXT: local.get $push59=, 6
; CHECK-NEXT: i64.load $push7=, 80($pop59)
; CHECK-NEXT: local.get $push60=, 6
; CHECK-NEXT: i64.load $push6=, 88($pop60)
; CHECK-NEXT: call floorl, $pop29, $pop7, $pop6
; CHECK-NEXT: local.get $push61=, 6
; CHECK-NEXT: i32.const $push26=, 48
; CHECK-NEXT: i32.add $push27=, $pop61, $pop26
; CHECK-NEXT: local.get $push62=, 6
; CHECK-NEXT: i64.load $push9=, 64($pop62)
; CHECK-NEXT: local.get $push63=, 6
; CHECK-NEXT: i64.load $push8=, 72($pop63)
; CHECK-NEXT: local.get $push65=, 3
; CHECK-NEXT: local.get $push64=, 4
; CHECK-NEXT: call powl, $pop27, $pop9, $pop8, $pop65, $pop64
; CHECK-NEXT: local.get $push66=, 6
; CHECK-NEXT: i32.const $push24=, 32
; CHECK-NEXT: i32.add $push25=, $pop66, $pop24
; CHECK-NEXT: local.get $push67=, 6
; CHECK-NEXT: i64.load $push11=, 48($pop67)
; CHECK-NEXT: local.get $push68=, 6
; CHECK-NEXT: i64.load $push10=, 56($pop68)
; CHECK-NEXT: local.get $push69=, 5
; CHECK-NEXT: call __powitf2, $pop25, $pop11, $pop10, $pop69
; CHECK-NEXT: local.get $push70=, 6
; CHECK-NEXT: i32.const $push22=, 16
; CHECK-NEXT: i32.add $push23=, $pop70, $pop22
; CHECK-NEXT: local.get $push71=, 6
; CHECK-NEXT: i64.load $push13=, 32($pop71)
; CHECK-NEXT: local.get $push72=, 6
; CHECK-NEXT: i64.load $push12=, 40($pop72)
; CHECK-NEXT: call truncl, $pop23, $pop13, $pop12
; CHECK-NEXT: local.get $push75=, 6
; CHECK-NEXT: local.get $push73=, 6
; CHECK-NEXT: i64.load $push15=, 16($pop73)
; CHECK-NEXT: local.get $push74=, 6
; CHECK-NEXT: i64.load $push14=, 24($pop74)
; CHECK-NEXT: call nearbyintl, $pop75, $pop15, $pop14
; CHECK-NEXT: local.get $push77=, 0
; CHECK-NEXT: local.get $push76=, 6
; CHECK-NEXT: i64.load $push16=, 8($pop76)
; CHECK-NEXT: i64.store 8($pop77), $pop16
; CHECK-NEXT: local.get $push79=, 0
; CHECK-NEXT: local.get $push78=, 6
; CHECK-NEXT: i64.load $push17=, 0($pop78)
; CHECK-NEXT: i64.store 0($pop79), $pop17
; CHECK-NEXT: local.get $push80=, 6
; CHECK-NEXT: i32.const $push20=, 144
; CHECK-NEXT: i32.add $push21=, $pop80, $pop20
; CHECK-NEXT: global.set __stack_pointer, $pop21
; CHECK-NEXT: return
%a = fadd fp128 %x, %y
%b = fmul fp128 %a, %y
%c = fdiv fp128 %b, %y
; libm calls
%d = call fp128 @llvm.sqrt.f128(fp128 %c)
%e = call fp128 @llvm.floor.f128(fp128 %d)
%f = call fp128 @llvm.pow.f128(fp128 %e, fp128 %y)
%g = call fp128 @llvm.powi.f128.i32(fp128 %f, i32 %z)
%h = call fp128 @llvm.trunc.f128(fp128 %g)
%i = call fp128 @llvm.nearbyint.f128(fp128 %h)
ret fp128 %i
}
define i128 @i128libcalls(i128 %x, i128 %y) {
; Basic ops should be expanded
; CHECK-LABEL: i128libcalls:
; CHECK: .functype i128libcalls (i32, i64, i64, i64, i64) -> ()
; CHECK-NEXT: .local i32, i64
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get $push8=, __stack_pointer
; CHECK-NEXT: i32.const $push9=, 32
; CHECK-NEXT: i32.sub $push17=, $pop8, $pop9
; CHECK-NEXT: local.tee $push16=, 5, $pop17
; CHECK-NEXT: global.set __stack_pointer, $pop16
; CHECK-NEXT: local.get $push18=, 5
; CHECK-NEXT: i32.const $push12=, 16
; CHECK-NEXT: i32.add $push13=, $pop18, $pop12
; CHECK-NEXT: local.get $push20=, 1
; CHECK-NEXT: local.get $push19=, 3
; CHECK-NEXT: i64.add $push15=, $pop20, $pop19
; CHECK-NEXT: local.tee $push14=, 6, $pop15
; CHECK-NEXT: local.get $push22=, 2
; CHECK-NEXT: local.get $push21=, 4
; CHECK-NEXT: i64.add $push0=, $pop22, $pop21
; CHECK-NEXT: local.get $push24=, 6
; CHECK-NEXT: local.get $push23=, 1
; CHECK-NEXT: i64.lt_u $push1=, $pop24, $pop23
; CHECK-NEXT: i64.extend_i32_u $push2=, $pop1
; CHECK-NEXT: i64.add $push3=, $pop0, $pop2
; CHECK-NEXT: local.get $push26=, 3
; CHECK-NEXT: local.get $push25=, 4
; CHECK-NEXT: call __multi3, $pop13, $pop14, $pop3, $pop26, $pop25
; CHECK-NEXT: local.get $push31=, 5
; CHECK-NEXT: local.get $push27=, 5
; CHECK-NEXT: i64.load $push5=, 16($pop27)
; CHECK-NEXT: local.get $push28=, 5
; CHECK-NEXT: i64.load $push4=, 24($pop28)
; CHECK-NEXT: local.get $push30=, 3
; CHECK-NEXT: local.get $push29=, 4
; CHECK-NEXT: call __umodti3, $pop31, $pop5, $pop4, $pop30, $pop29
; CHECK-NEXT: local.get $push33=, 0
; CHECK-NEXT: local.get $push32=, 5
; CHECK-NEXT: i64.load $push6=, 8($pop32)
; CHECK-NEXT: i64.store 8($pop33), $pop6
; CHECK-NEXT: local.get $push35=, 0
; CHECK-NEXT: local.get $push34=, 5
; CHECK-NEXT: i64.load $push7=, 0($pop34)
; CHECK-NEXT: i64.store 0($pop35), $pop7
; CHECK-NEXT: local.get $push36=, 5
; CHECK-NEXT: i32.const $push10=, 32
; CHECK-NEXT: i32.add $push11=, $pop36, $pop10
; CHECK-NEXT: global.set __stack_pointer, $pop11
; CHECK-NEXT: return
%a = add i128 %x, %y
%b = mul i128 %a, %y
%c = urem i128 %b, %y
ret i128 %c
}
define double @f64libcalls(double %x, double %y, i32 %z) {
; CHECK-LABEL: f64libcalls:
; CHECK: .functype f64libcalls (f64, f64, i32) -> (f64)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get $push12=, __stack_pointer
; CHECK-NEXT: i32.const $push13=, 16
; CHECK-NEXT: i32.sub $push19=, $pop12, $pop13
; CHECK-NEXT: local.tee $push18=, 3, $pop19
; CHECK-NEXT: global.set __stack_pointer, $pop18
; CHECK-NEXT: local.get $push23=, 0
; CHECK-NEXT: local.get $push20=, 0
; CHECK-NEXT: call $push0=, tan, $pop20
; CHECK-NEXT: call $push1=, cos, $pop0
; CHECK-NEXT: call $push2=, log10, $pop1
; CHECK-NEXT: local.get $push21=, 1
; CHECK-NEXT: call $push3=, pow, $pop2, $pop21
; CHECK-NEXT: local.get $push22=, 2
; CHECK-NEXT: call $push4=, __powidf2, $pop3, $pop22
; CHECK-NEXT: call $push5=, log, $pop4
; CHECK-NEXT: call $push6=, exp, $pop5
; CHECK-NEXT: call $push7=, exp10, $pop6
; CHECK-NEXT: call $push8=, cbrt, $pop7
; CHECK-NEXT: call $push9=, lround, $pop8
; CHECK-NEXT: call $push10=, ldexp, $pop23, $pop9
; CHECK-NEXT: local.get $push24=, 3
; CHECK-NEXT: i32.const $push16=, 12
; CHECK-NEXT: i32.add $push17=, $pop24, $pop16
; CHECK-NEXT: call $push25=, frexp, $pop10, $pop17
; CHECK-NEXT: local.set 0, $pop25
; CHECK-NEXT: local.get $push26=, 3
; CHECK-NEXT: i32.load $push11=, 12($pop26)
; CHECK-NEXT: call escape_value, $pop11
; CHECK-NEXT: local.get $push27=, 3
; CHECK-NEXT: i32.const $push14=, 16
; CHECK-NEXT: i32.add $push15=, $pop27, $pop14
; CHECK-NEXT: global.set __stack_pointer, $pop15
; CHECK-NEXT: local.get $push28=, 0
; CHECK-NEXT: return $pop28
%k = call double @llvm.tan.f64(double %x)
%a = call double @llvm.cos.f64(double %k)
%b = call double @llvm.log10.f64(double %a)
%c = call double @llvm.pow.f64(double %b, double %y)
%d = call double @llvm.powi.f64.i32(double %c, i32 %z)
%e = call double @llvm.log.f64(double %d)
%f = call double @llvm.exp.f64(double %e)
%g = call double @llvm.exp10.f64(double %f)
%h = call fast double @llvm.pow.f64(double %g, double 0x3FD5555555555555)
%i = call i32 @llvm.lround(double %h)
%j = call double @llvm.ldexp.f64.i32(double %x, i32 %i);
%result = call {double, i32} @llvm.frexp.f64.i32(double %j)
%result.0 = extractvalue { double, i32 } %result, 0
%result.1 = extractvalue { double, i32 } %result, 1
call void @escape_value(i32 %result.1)
ret double %result.0
}
; fcmp ord and unord (RTLIB::O_F32 / RTLIB::UO_F32 etc) are a special case (see
; comment in WebAssemblyRunimeLibcallSignatures.cpp) so check them separately.
; no libcalls are needed for f32 and f64
define i1 @unordd(double %x, double %y) {
; CHECK-LABEL: unordd:
; CHECK: .functype unordd (f64, f64) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push8=, 0
; CHECK-NEXT: local.get $push7=, 0
; CHECK-NEXT: f64.ne $push4=, $pop8, $pop7
; CHECK-NEXT: local.get $push10=, 1
; CHECK-NEXT: local.get $push9=, 1
; CHECK-NEXT: f64.ne $push3=, $pop10, $pop9
; CHECK-NEXT: i32.or $push5=, $pop4, $pop3
; CHECK-NEXT: local.get $push12=, 0
; CHECK-NEXT: local.get $push11=, 0
; CHECK-NEXT: f64.eq $push1=, $pop12, $pop11
; CHECK-NEXT: local.get $push14=, 1
; CHECK-NEXT: local.get $push13=, 1
; CHECK-NEXT: f64.eq $push0=, $pop14, $pop13
; CHECK-NEXT: i32.and $push2=, $pop1, $pop0
; CHECK-NEXT: i32.xor $push6=, $pop5, $pop2
; CHECK-NEXT: return $pop6
%a = fcmp uno double %x, %y
%b = fcmp ord double %x, %y
%c = xor i1 %a, %b
ret i1 %c
}
define i1 @unordf(float %x, float %y) {
; CHECK-LABEL: unordf:
; CHECK: .functype unordf (f32, f32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push8=, 0
; CHECK-NEXT: local.get $push7=, 0
; CHECK-NEXT: f32.ne $push4=, $pop8, $pop7
; CHECK-NEXT: local.get $push10=, 1
; CHECK-NEXT: local.get $push9=, 1
; CHECK-NEXT: f32.ne $push3=, $pop10, $pop9
; CHECK-NEXT: i32.or $push5=, $pop4, $pop3
; CHECK-NEXT: local.get $push12=, 0
; CHECK-NEXT: local.get $push11=, 0
; CHECK-NEXT: f32.eq $push1=, $pop12, $pop11
; CHECK-NEXT: local.get $push14=, 1
; CHECK-NEXT: local.get $push13=, 1
; CHECK-NEXT: f32.eq $push0=, $pop14, $pop13
; CHECK-NEXT: i32.and $push2=, $pop1, $pop0
; CHECK-NEXT: i32.xor $push6=, $pop5, $pop2
; CHECK-NEXT: return $pop6
%a = fcmp uno float %x, %y
%b = fcmp ord float %x, %y
%c = xor i1 %a, %b
ret i1 %c
}
define i1 @unordt(fp128 %x, fp128 %y) {
; CHECK-LABEL: unordt:
; CHECK: .functype unordt (i64, i64, i64, i64) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push6=, 0
; CHECK-NEXT: local.get $push5=, 1
; CHECK-NEXT: local.get $push4=, 2
; CHECK-NEXT: local.get $push3=, 3
; CHECK-NEXT: call $push1=, __unordtf2, $pop6, $pop5, $pop4, $pop3
; CHECK-NEXT: i32.const $push0=, 0
; CHECK-NEXT: i32.ne $push2=, $pop1, $pop0
; CHECK-NEXT: return $pop2
%a = fcmp uno fp128 %x, %y
ret i1 %a
}
define i1 @ordt(fp128 %x, fp128 %y) {
; CHECK-LABEL: ordt:
; CHECK: .functype ordt (i64, i64, i64, i64) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push4=, 1
; CHECK-NEXT: local.get $push3=, 2
; CHECK-NEXT: local.get $push2=, 3
; CHECK-NEXT: call $push0=, __unordtf2, $pop5, $pop4, $pop3, $pop2
; CHECK-NEXT: i32.eqz $push1=, $pop0
; CHECK-NEXT: return $pop1
%a = fcmp ord fp128 %x, %y
ret i1 %a
}