
This change implements #83736 The dot product lowering needs a tertiary multipy add operation. DXIL has three mad opcodes for `fmad`(46), `imad`(48), and `umad`(49). Dot product in DXIL only uses `imad`\ `umad`, but for completeness and because the hlsl `mad` intrinsic requires it `fmad` was also included. Two new intrinsics were needed to be created to complete this change. the `fmad` case already supported by llvm via `fmuladd` intrinsic. - `hlsl_intrinsics.h` - exposed mad api call. - `Builtins.td` - exposed a `mad` builtin. - `Sema.h` - make `tertiary` calls check for float types optional. - `CGBuiltin.cpp` - pick the intrinsic for singed\unsigned & float also reuse `int_fmuladd`. - `SemaChecking.cpp` - type checks for `__builtin_hlsl_mad`. - `IntrinsicsDirectX.td` create the two new intrinsics for `imad`\`umad`/ - `DXIL.td` - create the llvm intrinsic to `DXIL` opcode mapping. --------- Co-authored-by: Farzon Lotfi <farzon@farzon.com>
66 lines
2.5 KiB
LLVM
66 lines
2.5 KiB
LLVM
; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
|
|
|
|
; Make sure dxil operation function calls for round are generated for float and half.
|
|
; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
|
|
; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
|
|
; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
|
|
|
|
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
|
|
target triple = "dxil-pc-shadermodel6.7-library"
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef i16 @imad_short(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
|
|
entry:
|
|
%p2.addr = alloca i16, align 2
|
|
%p1.addr = alloca i16, align 2
|
|
%p0.addr = alloca i16, align 2
|
|
store i16 %p2, ptr %p2.addr, align 2
|
|
store i16 %p1, ptr %p1.addr, align 2
|
|
store i16 %p0, ptr %p0.addr, align 2
|
|
%0 = load i16, ptr %p0.addr, align 2
|
|
%1 = load i16, ptr %p1.addr, align 2
|
|
%2 = load i16, ptr %p2.addr, align 2
|
|
%dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2)
|
|
ret i16 %dx.imad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare i16 @llvm.dx.imad.i16(i16, i16, i16) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef i32 @imad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
|
|
entry:
|
|
%p2.addr = alloca i32, align 4
|
|
%p1.addr = alloca i32, align 4
|
|
%p0.addr = alloca i32, align 4
|
|
store i32 %p2, ptr %p2.addr, align 4
|
|
store i32 %p1, ptr %p1.addr, align 4
|
|
store i32 %p0, ptr %p0.addr, align 4
|
|
%0 = load i32, ptr %p0.addr, align 4
|
|
%1 = load i32, ptr %p1.addr, align 4
|
|
%2 = load i32, ptr %p2.addr, align 4
|
|
%dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2)
|
|
ret i32 %dx.imad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare i32 @llvm.dx.imad.i32(i32, i32, i32) #1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define noundef i64 @imad_int64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
|
|
entry:
|
|
%p2.addr = alloca i64, align 8
|
|
%p1.addr = alloca i64, align 8
|
|
%p0.addr = alloca i64, align 8
|
|
store i64 %p2, ptr %p2.addr, align 8
|
|
store i64 %p1, ptr %p1.addr, align 8
|
|
store i64 %p0, ptr %p0.addr, align 8
|
|
%0 = load i64, ptr %p0.addr, align 8
|
|
%1 = load i64, ptr %p1.addr, align 8
|
|
%2 = load i64, ptr %p2.addr, align 8
|
|
%dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2)
|
|
ret i64 %dx.imad
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
|
declare i64 @llvm.dx.imad.i64(i64, i64, i64) #1
|