
Adds a new pass that removes functions if they use features that are not supported on the current GPU. This change is aimed at preventing crashes when building code at O0 that uses idioms such as `if (ISA_VERSION >= N) intrinsic_a(); else intrinsic_b();` where ISA_VERSION is not constexpr, and intrinsic_a is not selectable on older targets. This is a pattern that's used all over the ROCm device libs. The main motive behind this change is to allow code using ROCm device libs to be built at O0. Note: the feature checking logic is done ad-hoc in the pass. There is no other pass that needs (or will need in the foreseeable future) to do similar feature-checking logic so I did not see a need to generalize the feature checking logic yet. It can (and should probably) be generalized later and moved to a TargetInfo-like class or helper file. Reviewed By: arsenm, Joe_Nash Differential Revision: https://reviews.llvm.org/D139000
80 lines
3.5 KiB
LLVM
80 lines
3.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -global-isel -amdgpu-enable-remove-incompatible-functions=0 -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s
|
|
|
|
; Make sure legalizer info doesn't assert on dummy targets
|
|
|
|
define i16 @vop3p_add_i16(i16 %arg0) #0 {
|
|
; CHECK-LABEL: name: vop3p_add_i16
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
|
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]]
|
|
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
|
%add = add i16 %arg0, %arg0
|
|
ret i16 %add
|
|
}
|
|
|
|
define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 {
|
|
; CHECK-LABEL: name: vop3p_add_v2i16
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
|
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
|
|
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
|
|
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
|
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
|
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
|
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
|
|
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
|
|
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
|
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
|
|
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]]
|
|
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]]
|
|
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16)
|
|
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16)
|
|
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
|
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
|
|
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
|
%add = add <2 x i16> %arg0, %arg0
|
|
ret <2 x i16> %add
|
|
}
|
|
|
|
define i16 @halfinsts_add_i16(i16 %arg0) #1 {
|
|
; CHECK-LABEL: name: halfinsts_add_i16
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
|
|
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
|
%add = add i16 %arg0, %arg0
|
|
ret i16 %add
|
|
}
|
|
|
|
define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 {
|
|
; CHECK-LABEL: name: halfinsts_add_v2i16
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
|
|
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]]
|
|
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%add = add <2 x i16> %arg0, %arg0
|
|
ret <2 x i16> %add
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+vop3p" }
|
|
attributes #0 = { "target-features"="+16-bit-insts" }
|