
Fixes #152220. - Adds `dx_imad` and `dx_umad` to `isTargetIntrinsicTriviallyScalarizable` - Adds tests that confirm the intrinsic is now scalarizing
158 lines
7.4 KiB
LLVM
158 lines
7.4 KiB
LLVM
; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
|
|
|
|
; Make sure dxil operation function calls for round are generated for float and half.
|
|
|
|
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
|
|
target triple = "dxil-pc-shadermodel6.7-library"
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef i16 @umad_ushort(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
|
|
entry:
|
|
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
|
|
%p2.addr = alloca i16, align 2
|
|
%p1.addr = alloca i16, align 2
|
|
%p0.addr = alloca i16, align 2
|
|
store i16 %p2, ptr %p2.addr, align 2
|
|
store i16 %p1, ptr %p1.addr, align 2
|
|
store i16 %p0, ptr %p0.addr, align 2
|
|
%0 = load i16, ptr %p0.addr, align 2
|
|
%1 = load i16, ptr %p1.addr, align 2
|
|
%2 = load i16, ptr %p2.addr, align 2
|
|
%dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2)
|
|
ret i16 %dx.umad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare i16 @llvm.dx.umad.i16(i16, i16, i16) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef i32 @umad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
|
|
entry:
|
|
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
|
|
%p2.addr = alloca i32, align 4
|
|
%p1.addr = alloca i32, align 4
|
|
%p0.addr = alloca i32, align 4
|
|
store i32 %p2, ptr %p2.addr, align 4
|
|
store i32 %p1, ptr %p1.addr, align 4
|
|
store i32 %p0, ptr %p0.addr, align 4
|
|
%0 = load i32, ptr %p0.addr, align 4
|
|
%1 = load i32, ptr %p1.addr, align 4
|
|
%2 = load i32, ptr %p2.addr, align 4
|
|
%dx.umad = call i32 @llvm.dx.umad.i32(i32 %0, i32 %1, i32 %2)
|
|
ret i32 %dx.umad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare i32 @llvm.dx.umad.i32(i32, i32, i32) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef i64 @umad_uint64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
|
|
entry:
|
|
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
|
|
%p2.addr = alloca i64, align 8
|
|
%p1.addr = alloca i64, align 8
|
|
%p0.addr = alloca i64, align 8
|
|
store i64 %p2, ptr %p2.addr, align 8
|
|
store i64 %p1, ptr %p1.addr, align 8
|
|
store i64 %p0, ptr %p0.addr, align 8
|
|
%0 = load i64, ptr %p0.addr, align 8
|
|
%1 = load i64, ptr %p1.addr, align 8
|
|
%2 = load i64, ptr %p2.addr, align 8
|
|
%dx.umad = call i64 @llvm.dx.umad.i64(i64 %0, i64 %1, i64 %2)
|
|
ret i64 %dx.umad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare i64 @llvm.dx.umad.i64(i64, i64, i64) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef <4 x i16> @umad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
|
|
entry:
|
|
; CHECK: extractelement <4 x i16> %p0, i64 0
|
|
; CHECK: extractelement <4 x i16> %p1, i64 0
|
|
; CHECK: extractelement <4 x i16> %p2, i64 0
|
|
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i16> %p0, i64 1
|
|
; CHECK: extractelement <4 x i16> %p1, i64 1
|
|
; CHECK: extractelement <4 x i16> %p2, i64 1
|
|
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i16> %p0, i64 2
|
|
; CHECK: extractelement <4 x i16> %p1, i64 2
|
|
; CHECK: extractelement <4 x i16> %p2, i64 2
|
|
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i16> %p0, i64 3
|
|
; CHECK: extractelement <4 x i16> %p1, i64 3
|
|
; CHECK: extractelement <4 x i16> %p2, i64 3
|
|
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
|
|
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
|
|
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
|
|
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
|
|
%dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
|
|
ret <4 x i16> %dx.umad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare <4 x i16> @llvm.dx.umad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef <4 x i32> @umad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
|
|
entry:
|
|
; CHECK: extractelement <4 x i32> %p0, i64 0
|
|
; CHECK: extractelement <4 x i32> %p1, i64 0
|
|
; CHECK: extractelement <4 x i32> %p2, i64 0
|
|
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i32> %p0, i64 1
|
|
; CHECK: extractelement <4 x i32> %p1, i64 1
|
|
; CHECK: extractelement <4 x i32> %p2, i64 1
|
|
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i32> %p0, i64 2
|
|
; CHECK: extractelement <4 x i32> %p1, i64 2
|
|
; CHECK: extractelement <4 x i32> %p2, i64 2
|
|
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i32> %p0, i64 3
|
|
; CHECK: extractelement <4 x i32> %p1, i64 3
|
|
; CHECK: extractelement <4 x i32> %p2, i64 3
|
|
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
|
|
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
|
|
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
|
|
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
|
|
%dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
|
|
ret <4 x i32> %dx.umad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare <4 x i32> @llvm.dx.umad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef <4 x i64> @umad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
|
|
entry:
|
|
; CHECK: extractelement <4 x i64> %p0, i64 0
|
|
; CHECK: extractelement <4 x i64> %p1, i64 0
|
|
; CHECK: extractelement <4 x i64> %p2, i64 0
|
|
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i64> %p0, i64 1
|
|
; CHECK: extractelement <4 x i64> %p1, i64 1
|
|
; CHECK: extractelement <4 x i64> %p2, i64 1
|
|
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i64> %p0, i64 2
|
|
; CHECK: extractelement <4 x i64> %p1, i64 2
|
|
; CHECK: extractelement <4 x i64> %p2, i64 2
|
|
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: extractelement <4 x i64> %p0, i64 3
|
|
; CHECK: extractelement <4 x i64> %p1, i64 3
|
|
; CHECK: extractelement <4 x i64> %p2, i64 3
|
|
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
|
|
; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
|
|
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
|
|
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
|
|
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
|
|
%dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
|
|
ret <4 x i64> %dx.umad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare <4 x i64> @llvm.dx.umad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
|
|
|
|
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
|