llvm-project/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
Matt Arsenault eb88e793ff AMDGPU: Add some additional test coverage for BFI matching
Try to stress constant bus restriction enforcement since some of these
are broken for GlobalISel. Split the r600 test because some of these
cases don't compile (and all the ones using return values are
discarded).
2022-01-26 15:06:50 -05:00

238 lines
7.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600 %s
; BFI_INT Definition pattern from ISA docs
; (y & x) | (z & ~x)
;
define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
; R600-LABEL: bfi_def:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; R600-NEXT: BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
entry:
%0 = xor i32 %x, -1
%1 = and i32 %z, %0
%2 = and i32 %y, %x
%3 = or i32 %1, %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
; SHA-256 Ch function
; z ^ (x & (y ^ z))
define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
; R600-LABEL: bfi_sha256_ch:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; R600-NEXT: BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
entry:
%0 = xor i32 %y, %z
%1 = and i32 %x, %0
%2 = xor i32 %z, %1
store i32 %2, i32 addrspace(1)* %out
ret void
}
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
; R600-LABEL: bfi_sha256_ma:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: XOR_INT * T0.W, KC0[2].Z, KC0[2].W,
; R600-NEXT: BFI_INT * T0.X, PV.W, KC0[3].X, KC0[2].W,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = and i32 %x, %z
%1 = or i32 %x, %z
%2 = and i32 %y, %1
%3 = or i32 %0, %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
; R600-LABEL: v_bitselect_v2i32_pat1:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
%xor.0 = xor <2 x i32> %a, %mask
%and = and <2 x i32> %xor.0, %b
%bitselect = xor <2 x i32> %and, %mask
ret <2 x i32> %bitselect
}
define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
; R600-LABEL: v_bitselect_i64_pat_0:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
%and0 = and i64 %a, %b
%not.a = xor i64 %a, -1
%and1 = and i64 %not.a, %mask
%bitselect = or i64 %and0, %and1
ret i64 %bitselect
}
define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
; R600-LABEL: v_bitselect_i64_pat_1:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
%xor.0 = xor i64 %a, %mask
%and = and i64 %xor.0, %b
%bitselect = xor i64 %and, %mask
ret i64 %bitselect
}
define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
; R600-LABEL: v_bitselect_i64_pat_2:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
%xor.0 = xor i64 %a, %mask
%and = and i64 %xor.0, %b
%bitselect = xor i64 %and, %mask
ret i64 %bitselect
}
define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
; R600-LABEL: v_bfi_sha256_ma_i64:
; R600: ; %bb.0: ; %entry
; R600-NEXT: CF_END
; R600-NEXT: PAD
entry:
%and0 = and i64 %x, %z
%or0 = or i64 %x, %z
%and1 = and i64 %y, %or0
%or1 = or i64 %and0, %and1
ret i64 %or1
}
define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
; R600-LABEL: s_bitselect_i64_pat_0:
; R600: ; %bb.0:
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: MOV * T0.W, KC0[3].Y,
; R600-NEXT: BFI_INT * T0.W, KC0[2].Y, KC0[2].W, PV.W,
; R600-NEXT: MOV * T1.W, KC0[3].Z,
; R600-NEXT: BFI_INT T1.W, KC0[2].Z, KC0[3].X, PV.W,
; R600-NEXT: ADDC_UINT * T2.W, T0.W, literal.x,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
; R600-NEXT: ADD_INT * T0.Y, PV.W, PS,
; R600-NEXT: ADD_INT T0.X, T0.W, literal.x,
; R600-NEXT: MOV * T1.X, literal.y,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
%and0 = and i64 %a, %b
%not.a = xor i64 %a, -1
%and1 = and i64 %not.a, %mask
%bitselect = or i64 %and0, %and1
%scalar.use = add i64 %bitselect, 10
store i64 %scalar.use, i64 addrspace(1)* undef
ret void
}
define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
; R600-LABEL: s_bitselect_i64_pat_1:
; R600: ; %bb.0:
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: MOV * T0.W, KC0[3].Y,
; R600-NEXT: BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
; R600-NEXT: MOV * T1.W, KC0[3].Z,
; R600-NEXT: BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
; R600-NEXT: ADDC_UINT * T2.W, T0.W, literal.x,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
; R600-NEXT: ADD_INT * T0.Y, PV.W, PS,
; R600-NEXT: ADD_INT T0.X, T0.W, literal.x,
; R600-NEXT: MOV * T1.X, literal.y,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
%xor.0 = xor i64 %a, %mask
%and = and i64 %xor.0, %b
%bitselect = xor i64 %and, %mask
%scalar.use = add i64 %bitselect, 10
store i64 %scalar.use, i64 addrspace(1)* undef
ret void
}
define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
; R600-LABEL: s_bitselect_i64_pat_2:
; R600: ; %bb.0:
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: MOV * T0.W, KC0[3].Y,
; R600-NEXT: BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
; R600-NEXT: MOV * T1.W, KC0[3].Z,
; R600-NEXT: BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
; R600-NEXT: ADDC_UINT * T2.W, T0.W, literal.x,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
; R600-NEXT: ADD_INT * T0.Y, PV.W, PS,
; R600-NEXT: ADD_INT T0.X, T0.W, literal.x,
; R600-NEXT: MOV * T1.X, literal.y,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
%xor.0 = xor i64 %a, %mask
%and = and i64 %xor.0, %b
%bitselect = xor i64 %and, %mask
%scalar.use = add i64 %bitselect, 10
store i64 %scalar.use, i64 addrspace(1)* undef
ret void
}
define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
; R600-LABEL: s_bfi_sha256_ma_i64:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: XOR_INT * T0.W, KC0[2].Y, KC0[2].W,
; R600-NEXT: BFI_INT T0.W, PV.W, KC0[3].Y, KC0[2].W,
; R600-NEXT: XOR_INT * T1.W, KC0[2].Z, KC0[3].X,
; R600-NEXT: BFI_INT T1.W, PS, KC0[3].Z, KC0[3].X,
; R600-NEXT: ADDC_UINT * T2.W, PV.W, literal.x,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
; R600-NEXT: ADD_INT * T0.Y, PV.W, PS,
; R600-NEXT: ADD_INT T0.X, T0.W, literal.x,
; R600-NEXT: MOV * T1.X, literal.y,
; R600-NEXT: 10(1.401298e-44), 0(0.000000e+00)
entry:
%and0 = and i64 %x, %z
%or0 = or i64 %x, %z
%and1 = and i64 %y, %or0
%or1 = or i64 %and0, %and1
%scalar.use = add i64 %or1, 10
store i64 %scalar.use, i64 addrspace(1)* undef
ret void
}