Fix ABI on old subtargets so match new subtargets, packing 16-bit element subvectors into 32-bit registers. Previously this would be scalarized and promoted to i32/float. Note this only changes the vector cases. Scalar i16/half are still promoted to i32/float for now. I've unsuccessfully tried to make that switch in the past, so leave that for later. This will help with removal of softPromoteHalfType.
265 lines
10 KiB
LLVM
265 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
|
|
|
define i8 @atomic_load_flat_monotonic_i8(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i8:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i8, ptr %ptr monotonic, align 1
|
|
ret i8 %load
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i8_zext_to_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i8, ptr %ptr monotonic, align 1
|
|
%ext = zext i8 %load to i32
|
|
ret i32 %ext
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i8, ptr %ptr monotonic, align 1
|
|
%ext = sext i8 %load to i32
|
|
ret i32 %ext
|
|
}
|
|
|
|
define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i8_zext_to_i16:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i8, ptr %ptr monotonic, align 1
|
|
%ext = zext i8 %load to i16
|
|
ret i16 %ext
|
|
}
|
|
|
|
define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i8, ptr %ptr monotonic, align 1
|
|
%ext = sext i8 %load to i16
|
|
ret i16 %ext
|
|
}
|
|
|
|
define i16 @atomic_load_flat_monotonic_i16(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i16:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
ret i16 %load
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i16_zext_to_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
%ext = zext i16 %load to i32
|
|
ret i32 %ext
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_sshort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
%ext = sext i16 %load to i32
|
|
ret i32 %ext
|
|
}
|
|
|
|
define half @atomic_load_flat_monotonic_f16(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_f16:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic half, ptr %ptr monotonic, align 2
|
|
ret half %load
|
|
}
|
|
|
|
define bfloat @atomic_load_flat_monotonic_bf16(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_bf16:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic bfloat, ptr %ptr monotonic, align 2
|
|
ret bfloat %load
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_f16_zext_to_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic half, ptr %ptr monotonic, align 2
|
|
%cast = bitcast half %load to i16
|
|
%ext = zext i16 %cast to i32
|
|
ret i32 %ext
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_bf16_zext_to_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic bfloat, ptr %ptr monotonic, align 2
|
|
%cast = bitcast bfloat %load to i16
|
|
%ext = zext i16 %cast to i32
|
|
ret i32 %ext
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_i16_d16_hi_shift(ptr %ptr) {
|
|
; GCN-LABEL: atomic_load_flat_monotonic_i16_d16_hi_shift:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
%ext = zext i16 %load to i32
|
|
%shl = shl i32 %ext, 16
|
|
ret i32 %shl
|
|
}
|
|
|
|
define <2 x i16> @atomic_load_flat_monotonic_i16_d16_hi_vector_insert(ptr %ptr, <2 x i16> %vec) {
|
|
; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: flat_load_ushort v3, v[0:1] glc
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: flat_load_short_d16_hi v2, v[0:1] glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
%insert = insertelement <2 x i16> %vec, i16 %load, i32 1
|
|
ret <2 x i16> %insert
|
|
}
|
|
|
|
define i32 @atomic_load_flat_monotonic_i16_d16_lo_or(ptr %ptr, i16 %high) {
|
|
; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2
|
|
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v2
|
|
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
%ext = zext i16 %load to i32
|
|
%high.ext = zext i16 %high to i32
|
|
%shl = shl i32 %high.ext, 16
|
|
%or = or i32 %shl, %ext
|
|
ret i32 %or
|
|
}
|
|
|
|
define <2 x i16> @atomic_load_flat_monotonic_i16_d16_lo_vector_insert(ptr %ptr, <2 x i16> %vec) {
|
|
; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
|
|
; GFX8-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: flat_load_ushort v3, v[0:1] glc
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: flat_load_short_d16 v2, v[0:1] glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
%load = load atomic i16, ptr %ptr monotonic, align 2
|
|
%insert = insertelement <2 x i16> %vec, i16 %load, i32 0
|
|
ret <2 x i16> %insert
|
|
}
|