When replacing certain AMDGPU library calls with constant data vectors, the existing implementation only handled single and double-precision floats. This change extends the functionality to also support half-precision floats. Additionally, it refactors the function responsible for generating constant float data vectors to improve readability and reduces code duplication. In tandem with this refactoring, the patch relaxes the check for constant data vectors to include any constant of vector type. This allows other constant vectors to be processed, such as those created from constant aggregate zeros (e.g. `<2 x float> zeroinitializer`). --------- Signed-off-by: Steffen Holst Larsen <sholstla@amd.com>
70 lines
2.4 KiB
LLVM
70 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
|
|
|
|
define float @test_tdo_scalar_f32_acos() {
|
|
; CHECK-LABEL: define float @test_tdo_scalar_f32_acos() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: ret float 0x3FF921FB60000000
|
|
;
|
|
entry:
|
|
%c = call float @_Z4acosf(float 0.000000e+00)
|
|
ret float %c
|
|
}
|
|
|
|
define <4 x float> @test_tdo_v2_f32_acos() {
|
|
; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_acos() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: ret <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 0x400921FB60000000>
|
|
;
|
|
entry:
|
|
%c = call <4 x float> @_Z4acosDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
|
|
ret <4 x float> %c
|
|
}
|
|
|
|
define half @test_tdo_scalar_f16_acos() {
|
|
; CHECK-LABEL: define half @test_tdo_scalar_f16_acos() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: ret half 0xH3E48
|
|
;
|
|
entry:
|
|
%c = call half @_Z4acosDh(half 0.000000e+00)
|
|
ret half %c
|
|
}
|
|
|
|
define <4 x half> @test_tdo_v2_f16_acos() {
|
|
; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_acos() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: ret <4 x half> <half 0xH3E48, half 0xH3E48, half 0xH0000, half 0xH4248>
|
|
;
|
|
entry:
|
|
%c = call <4 x half> @_Z4acosDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
|
|
ret <4 x half> %c
|
|
}
|
|
|
|
define double @test_tdo_scalar_f64_acos() {
|
|
; CHECK-LABEL: define double @test_tdo_scalar_f64_acos() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: ret double 0x3FF921FB54442D18
|
|
;
|
|
entry:
|
|
%c = call double @_Z4acosd(double 0.000000e+00)
|
|
ret double %c
|
|
}
|
|
|
|
define <4 x double> @test_tdo_v2_f64_acos() {
|
|
; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_acos() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: ret <4 x double> <double 0x3FF921FB54442D18, double 0x3FF921FB54442D18, double 0.000000e+00, double 0x400921FB54442D18>
|
|
;
|
|
entry:
|
|
%c = call <4 x double> @_Z4acosDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
|
|
ret <4 x double> %c
|
|
}
|
|
|
|
declare float @_Z4acosf(float)
|
|
declare <4 x float> @_Z4acosDv4_f(<4 x float>)
|
|
declare half @_Z4acosDh(half)
|
|
declare <4 x half> @_Z4acosDv4_Dh(<4 x half>)
|
|
declare double @_Z4acosd(double)
|
|
declare <4 x double> @_Z4acosDv4_d(<4 x double>)
|