From 63e776604779f52a792a866577650967fe19f49f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 14 Aug 2025 16:15:22 +0200 Subject: [PATCH] [SystemZ] Allow forming overflow op for i128 (#153557) Allow matching i128 overflow pattern into UADDO, which then allows use of vaccq. --- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 2 +- .../CodeGen/SystemZ/atomicrmw-ops-i128.ll | 13 +- llvm/test/CodeGen/SystemZ/int-cmp-65.ll | 259 +++++++++++++++--- 3 files changed, 225 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 1866962e1758..707887c59bd6 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -523,7 +523,7 @@ public: bool MathUsed) const override { // Form add and sub with overflow intrinsics regardless of any extra // users of the math result. - return VT == MVT::i32 || VT == MVT::i64; + return VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i128; } bool shouldConsiderGEPOffsetSplit() const override { return true; } diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll index c088f6d862e7..9271dc73e272 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll @@ -363,10 +363,11 @@ define i128 @atomicrmw_uinc_wrap(ptr %src, i128 %b) { define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) { ; CHECK-LABEL: atomicrmw_udec_wrap: ; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI12_0 ; CHECK-NEXT: vl %v0, 0(%r4), 3 ; CHECK-NEXT: vl %v3, 0(%r3), 4 -; CHECK-NEXT: vgbm %v1, 65535 -; CHECK-NEXT: vgbm %v2, 0 +; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vgbm %v2, 65535 ; CHECK-NEXT: j .LBB12_2 ; CHECK-NEXT: .LBB12_1: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 @@ -379,6 +380,9 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) { ; CHECK-NEXT: je .LBB12_8 ; CHECK-NEXT: .LBB12_2: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vscbiq %v4, %v3, %v1 +; CHECK-NEXT: vlgvf %r0, %v4, 3 +; CHECK-NEXT: xilf %r0, 1 ; CHECK-NEXT: veclg %v0, %v3 ; CHECK-NEXT: jlh .LBB12_4 ; CHECK-NEXT: # %bb.3: # %atomicrmw.start @@ -390,12 +394,11 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) { ; CHECK-NEXT: jl .LBB12_6 ; CHECK-NEXT: # %bb.5: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 -; CHECK-NEXT: vaq %v4, %v3, %v1 +; CHECK-NEXT: vaq %v4, %v3, %v2 ; CHECK-NEXT: .LBB12_6: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 -; CHECK-NEXT: vceqgs %v5, %v3, %v2 ; CHECK-NEXT: vlr %v5, %v0 -; CHECK-NEXT: je .LBB12_1 +; CHECK-NEXT: cijlh %r0, 0, .LBB12_1 ; CHECK-NEXT: # %bb.7: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 ; CHECK-NEXT: vlr %v5, %v4 diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-65.ll b/llvm/test/CodeGen/SystemZ/int-cmp-65.ll index b06ab3c1fa3d..f1d850200fe4 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-65.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-65.ll @@ -1,42 +1,110 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; Test usage of VACC/VSCBI. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=BASELINE +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s --check-prefix=Z13 define i128 @i128_subc_1(i128 %a, i128 %b) unnamed_addr { -; CHECK-LABEL: i128_subc_1: -; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vscbiq %v0, %v1, %v0 -; CHECK-NEXT: vst %v0, 0(%r2), 3 -; CHECK-NEXT: br %r14 +; BASELINE-LABEL: i128_subc_1: +; BASELINE: # %bb.0: +; BASELINE-NEXT: stmg %r14, %r15, 112(%r15) +; BASELINE-NEXT: .cfi_offset %r14, -48 +; BASELINE-NEXT: .cfi_offset %r15, -40 +; BASELINE-NEXT: lg %r5, 0(%r4) +; BASELINE-NEXT: lg %r14, 0(%r3) +; BASELINE-NEXT: lg %r1, 8(%r3) +; BASELINE-NEXT: clgr %r14, %r5 +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: clg %r1, 8(%r4) +; BASELINE-NEXT: ipm %r1 +; BASELINE-NEXT: cgrjlh %r14, %r5, .LBB0_2 +; BASELINE-NEXT: # %bb.1: +; BASELINE-NEXT: xilf %r1, 4294967295 +; BASELINE-NEXT: risbg %r0, %r1, 63, 191, 36 +; BASELINE-NEXT: j .LBB0_3 +; BASELINE-NEXT: .LBB0_2: +; BASELINE-NEXT: xilf %r0, 4294967295 +; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 36 +; BASELINE-NEXT: .LBB0_3: +; BASELINE-NEXT: llgfr %r0, %r0 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: lmg %r14, %r15, 112(%r15) +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_subc_1: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r4), 3 +; Z13-NEXT: vl %v1, 0(%r3), 3 +; Z13-NEXT: vscbiq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 %cmp = icmp uge i128 %a, %b %ext = zext i1 %cmp to i128 ret i128 %ext } define i128 @i128_subc_2(i128 %a, i128 %b) unnamed_addr { -; CHECK-LABEL: i128_subc_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vscbiq %v0, %v1, %v0 -; CHECK-NEXT: vst %v0, 0(%r2), 3 -; CHECK-NEXT: br %r14 +; BASELINE-LABEL: i128_subc_2: +; BASELINE: # %bb.0: +; BASELINE-NEXT: stmg %r14, %r15, 112(%r15) +; BASELINE-NEXT: .cfi_offset %r14, -48 +; BASELINE-NEXT: .cfi_offset %r15, -40 +; BASELINE-NEXT: lg %r5, 0(%r4) +; BASELINE-NEXT: lg %r14, 0(%r3) +; BASELINE-NEXT: lg %r0, 8(%r3) +; BASELINE-NEXT: clgr %r14, %r5 +; BASELINE-NEXT: ipm %r1 +; BASELINE-NEXT: clg %r0, 8(%r4) +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: cgrjlh %r14, %r5, .LBB1_2 +; BASELINE-NEXT: # %bb.1: +; BASELINE-NEXT: afi %r0, -536870912 +; BASELINE-NEXT: srl %r0, 31 +; BASELINE-NEXT: j .LBB1_3 +; BASELINE-NEXT: .LBB1_2: +; BASELINE-NEXT: afi %r1, -536870912 +; BASELINE-NEXT: srl %r1, 31 +; BASELINE-NEXT: lr %r0, %r1 +; BASELINE-NEXT: .LBB1_3: +; BASELINE-NEXT: llgfr %r0, %r0 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: lmg %r14, %r15, 112(%r15) +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_subc_2: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r3), 3 +; Z13-NEXT: vl %v1, 0(%r4), 3 +; Z13-NEXT: vscbiq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 %cmp = icmp ule i128 %a, %b %ext = zext i1 %cmp to i128 ret i128 %ext } define i128 @i128_addc_1(i128 %a, i128 %b) { -; CHECK-LABEL: i128_addc_1: -; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vaccq %v0, %v1, %v0 -; CHECK-NEXT: vst %v0, 0(%r2), 3 -; CHECK-NEXT: br %r14 +; BASELINE-LABEL: i128_addc_1: +; BASELINE: # %bb.0: +; BASELINE-NEXT: lg %r0, 8(%r3) +; BASELINE-NEXT: lg %r1, 0(%r3) +; BASELINE-NEXT: alg %r0, 8(%r4) +; BASELINE-NEXT: alcg %r1, 0(%r4) +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_addc_1: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r4), 3 +; Z13-NEXT: vl %v1, 0(%r3), 3 +; Z13-NEXT: vaccq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 %sum = add i128 %a, %b %cmp = icmp ult i128 %sum, %a %ext = zext i1 %cmp to i128 @@ -44,13 +112,25 @@ define i128 @i128_addc_1(i128 %a, i128 %b) { } define i128 @i128_addc_2(i128 %a, i128 %b) { -; CHECK-LABEL: i128_addc_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vaccq %v0, %v1, %v0 -; CHECK-NEXT: vst %v0, 0(%r2), 3 -; CHECK-NEXT: br %r14 +; BASELINE-LABEL: i128_addc_2: +; BASELINE: # %bb.0: +; BASELINE-NEXT: lg %r0, 8(%r3) +; BASELINE-NEXT: lg %r1, 0(%r3) +; BASELINE-NEXT: alg %r0, 8(%r4) +; BASELINE-NEXT: alcg %r1, 0(%r4) +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_addc_2: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r4), 3 +; Z13-NEXT: vl %v1, 0(%r3), 3 +; Z13-NEXT: vaccq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 %sum = add i128 %a, %b %cmp = icmp ult i128 %sum, %b %ext = zext i1 %cmp to i128 @@ -58,13 +138,25 @@ define i128 @i128_addc_2(i128 %a, i128 %b) { } define i128 @i128_addc_3(i128 %a, i128 %b) { -; CHECK-LABEL: i128_addc_3: -; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vaccq %v0, %v1, %v0 -; CHECK-NEXT: vst %v0, 0(%r2), 3 -; CHECK-NEXT: br %r14 +; BASELINE-LABEL: i128_addc_3: +; BASELINE: # %bb.0: +; BASELINE-NEXT: lg %r0, 8(%r3) +; BASELINE-NEXT: lg %r1, 0(%r3) +; BASELINE-NEXT: alg %r0, 8(%r4) +; BASELINE-NEXT: alcg %r1, 0(%r4) +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_addc_3: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r4), 3 +; Z13-NEXT: vl %v1, 0(%r3), 3 +; Z13-NEXT: vaccq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 %sum = add i128 %a, %b %cmp = icmp ugt i128 %a, %sum %ext = zext i1 %cmp to i128 @@ -72,16 +164,97 @@ define i128 @i128_addc_3(i128 %a, i128 %b) { } define i128 @i128_addc_4(i128 %a, i128 %b) { -; CHECK-LABEL: i128_addc_4: -; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vaccq %v0, %v1, %v0 -; CHECK-NEXT: vst %v0, 0(%r2), 3 -; CHECK-NEXT: br %r14 +; BASELINE-LABEL: i128_addc_4: +; BASELINE: # %bb.0: +; BASELINE-NEXT: lg %r0, 8(%r3) +; BASELINE-NEXT: lg %r1, 0(%r3) +; BASELINE-NEXT: alg %r0, 8(%r4) +; BASELINE-NEXT: alcg %r1, 0(%r4) +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_addc_4: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r4), 3 +; Z13-NEXT: vl %v1, 0(%r3), 3 +; Z13-NEXT: vaccq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 %sum = add i128 %a, %b %cmp = icmp ugt i128 %b, %sum %ext = zext i1 %cmp to i128 ret i128 %ext } +define i128 @i128_addc_xor(i128 %a, i128 %b) { +; BASELINE-LABEL: i128_addc_xor: +; BASELINE: # %bb.0: +; BASELINE-NEXT: lg %r0, 8(%r4) +; BASELINE-NEXT: lg %r1, 0(%r4) +; BASELINE-NEXT: alg %r0, 8(%r3) +; BASELINE-NEXT: alcg %r1, 0(%r3) +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_addc_xor: +; Z13: # %bb.0: +; Z13-NEXT: vl %v0, 0(%r3), 3 +; Z13-NEXT: vl %v1, 0(%r4), 3 +; Z13-NEXT: vaccq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 + %b.not = xor i128 %b, -1 + %cmp = icmp ugt i128 %a, %b.not + %ext = zext i1 %cmp to i128 + ret i128 %ext +} + +define i128 @i128_addc_xor_inv(i128 %a, i128 %b) { +; BASELINE-LABEL: i128_addc_xor_inv: +; BASELINE: # %bb.0: +; BASELINE-NEXT: stmg %r14, %r15, 112(%r15) +; BASELINE-NEXT: .cfi_offset %r14, -48 +; BASELINE-NEXT: .cfi_offset %r15, -40 +; BASELINE-NEXT: lg %r5, 0(%r3) +; BASELINE-NEXT: lghi %r14, -1 +; BASELINE-NEXT: xg %r14, 0(%r4) +; BASELINE-NEXT: lghi %r1, -1 +; BASELINE-NEXT: xg %r1, 8(%r4) +; BASELINE-NEXT: clgr %r5, %r14 +; BASELINE-NEXT: ipm %r0 +; BASELINE-NEXT: clg %r1, 8(%r3) +; BASELINE-NEXT: ipm %r1 +; BASELINE-NEXT: cgrjlh %r5, %r14, .LBB7_2 +; BASELINE-NEXT: # %bb.1: +; BASELINE-NEXT: xilf %r1, 4294967295 +; BASELINE-NEXT: risbg %r0, %r1, 63, 191, 36 +; BASELINE-NEXT: j .LBB7_3 +; BASELINE-NEXT: .LBB7_2: +; BASELINE-NEXT: afi %r0, -536870912 +; BASELINE-NEXT: srl %r0, 31 +; BASELINE-NEXT: .LBB7_3: +; BASELINE-NEXT: llgfr %r0, %r0 +; BASELINE-NEXT: stg %r0, 8(%r2) +; BASELINE-NEXT: mvghi 0(%r2), 0 +; BASELINE-NEXT: lmg %r14, %r15, 112(%r15) +; BASELINE-NEXT: br %r14 +; +; Z13-LABEL: i128_addc_xor_inv: +; Z13: # %bb.0: +; Z13-NEXT: vl %v1, 0(%r4), 3 +; Z13-NEXT: vl %v0, 0(%r3), 3 +; Z13-NEXT: vno %v1, %v1, %v1 +; Z13-NEXT: vscbiq %v0, %v1, %v0 +; Z13-NEXT: vst %v0, 0(%r2), 3 +; Z13-NEXT: br %r14 + %b.not = xor i128 %b, -1 + %cmp = icmp ule i128 %a, %b.not + %ext = zext i1 %cmp to i128 + ret i128 %ext +}