llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acos.ll
Steffen Larsen 5b156a4372
[AMDGPU] Add half vector support for table-driven libcall optimization (#178638)
When replacing certain AMDGPU library calls with constant data vectors,
the existing implementation only handled single and double-precision
floats. This change extends the functionality to also support
half-precision floats.

Additionally, it refactors the function responsible for generating
constant float data vectors to improve readability and reduces code
duplication. In tandem with this refactoring, the patch relaxes the
check for constant data vectors to include any constant of vector type.
This allows other constant vectors to be processed, such as those
created from constant aggregate zeros (e.g. `<2 x float>
zeroinitializer`).

---------

Signed-off-by: Steffen Holst Larsen <sholstla@amd.com>
2026-03-04 08:42:44 +01:00

70 lines
2.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
define float @test_tdo_scalar_f32_acos() {
; CHECK-LABEL: define float @test_tdo_scalar_f32_acos() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret float 0x3FF921FB60000000
;
entry:
%c = call float @_Z4acosf(float 0.000000e+00)
ret float %c
}
define <4 x float> @test_tdo_v2_f32_acos() {
; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_acos() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 0x400921FB60000000>
;
entry:
%c = call <4 x float> @_Z4acosDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
ret <4 x float> %c
}
define half @test_tdo_scalar_f16_acos() {
; CHECK-LABEL: define half @test_tdo_scalar_f16_acos() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret half 0xH3E48
;
entry:
%c = call half @_Z4acosDh(half 0.000000e+00)
ret half %c
}
define <4 x half> @test_tdo_v2_f16_acos() {
; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_acos() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret <4 x half> <half 0xH3E48, half 0xH3E48, half 0xH0000, half 0xH4248>
;
entry:
%c = call <4 x half> @_Z4acosDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
ret <4 x half> %c
}
define double @test_tdo_scalar_f64_acos() {
; CHECK-LABEL: define double @test_tdo_scalar_f64_acos() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret double 0x3FF921FB54442D18
;
entry:
%c = call double @_Z4acosd(double 0.000000e+00)
ret double %c
}
define <4 x double> @test_tdo_v2_f64_acos() {
; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_acos() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret <4 x double> <double 0x3FF921FB54442D18, double 0x3FF921FB54442D18, double 0.000000e+00, double 0x400921FB54442D18>
;
entry:
%c = call <4 x double> @_Z4acosDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
ret <4 x double> %c
}
declare float @_Z4acosf(float)
declare <4 x float> @_Z4acosDv4_f(<4 x float>)
declare half @_Z4acosDh(half)
declare <4 x half> @_Z4acosDv4_Dh(<4 x half>)
declare double @_Z4acosd(double)
declare <4 x double> @_Z4acosDv4_d(<4 x double>)