Farzon Lotfi 643b31dbe8
[HLSL] implement mad intrinsic (#83826)
This change implements #83736
The dot product lowering needs a tertiary multipy add operation. DXIL
has three mad opcodes for `fmad`(46), `imad`(48), and `umad`(49). Dot
product in DXIL only uses `imad`\ `umad`, but for completeness and
because the hlsl `mad` intrinsic requires it `fmad` was also included.
Two new intrinsics were needed to be created to complete this change.
the `fmad` case already supported by llvm via `fmuladd` intrinsic.

- `hlsl_intrinsics.h` - exposed mad api call.
- `Builtins.td` - exposed a `mad` builtin.
- `Sema.h` - make `tertiary` calls check for float types optional. 
- `CGBuiltin.cpp` - pick the intrinsic for singed\unsigned & float also
reuse `int_fmuladd`.
- `SemaChecking.cpp` - type checks for `__builtin_hlsl_mad`. 
- `IntrinsicsDirectX.td` create the two new intrinsics for
`imad`\`umad`/
- `DXIL.td` - create the llvm intrinsic to  `DXIL` opcode mapping.

---------

Co-authored-by: Farzon Lotfi <farzon@farzon.com>
2024-03-05 12:23:26 -05:00

66 lines
2.5 KiB
LLVM

; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.7-library"
; Function Attrs: noinline nounwind optnone
define noundef i16 @imad_short(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
entry:
%p2.addr = alloca i16, align 2
%p1.addr = alloca i16, align 2
%p0.addr = alloca i16, align 2
store i16 %p2, ptr %p2.addr, align 2
store i16 %p1, ptr %p1.addr, align 2
store i16 %p0, ptr %p0.addr, align 2
%0 = load i16, ptr %p0.addr, align 2
%1 = load i16, ptr %p1.addr, align 2
%2 = load i16, ptr %p2.addr, align 2
%dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2)
ret i16 %dx.imad
}
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i16 @llvm.dx.imad.i16(i16, i16, i16) #1
; Function Attrs: noinline nounwind optnone
define noundef i32 @imad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
entry:
%p2.addr = alloca i32, align 4
%p1.addr = alloca i32, align 4
%p0.addr = alloca i32, align 4
store i32 %p2, ptr %p2.addr, align 4
store i32 %p1, ptr %p1.addr, align 4
store i32 %p0, ptr %p0.addr, align 4
%0 = load i32, ptr %p0.addr, align 4
%1 = load i32, ptr %p1.addr, align 4
%2 = load i32, ptr %p2.addr, align 4
%dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2)
ret i32 %dx.imad
}
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i32 @llvm.dx.imad.i32(i32, i32, i32) #1
; Function Attrs: noinline nounwind optnone
define noundef i64 @imad_int64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
entry:
%p2.addr = alloca i64, align 8
%p1.addr = alloca i64, align 8
%p0.addr = alloca i64, align 8
store i64 %p2, ptr %p2.addr, align 8
store i64 %p1, ptr %p1.addr, align 8
store i64 %p0, ptr %p0.addr, align 8
%0 = load i64, ptr %p0.addr, align 8
%1 = load i64, ptr %p1.addr, align 8
%2 = load i64, ptr %p2.addr, align 8
%dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2)
ret i64 %dx.imad
}
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i64 @llvm.dx.imad.i64(i64, i64, i64) #1