
This change implements lowering for #70076, #70100, #70072, & #70102 `CGBuiltin.cpp` - - simplify `lerp` intrinsic `IntrinsicsDirectX.td` - simplify `lerp` intrinsic `SemaChecking.cpp` - remove unnecessary check `DXILIntrinsicExpansion.*` - add intrinsic to instruction expansion cases `DXILOpLowering.cpp` - make sure `DXILIntrinsicExpansion` happens first `DirectX.h` - changes to support new pass `DirectXTargetMachine.cpp` - changes to support new pass Why `any`, and `lerp` as instruction expansion just for DXIL? - SPIR-V there is an [OpAny](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpAny) - SPIR-V has a GLSL lerp extension via [Fmix](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FMix) Why `exp` instruction expansion? - We have an `exp2` opcode and `exp` reuses that opcode. So instruction expansion is a convenient way to do preprocessing. - Further SPIR-V has a GLSL exp extension via [Exp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#Exp) and [Exp2](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#Exp2) Why `rcp` as instruction expansion? This one is a bit of the odd man out and might have to move to `cgbuiltins` when we better understand SPIRV requirements. However I included it because it seems like [fast math mode has an AllowRecip flag](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_fp_fast_math_mode) which lets you compute the reciprocal without performing the division. We don't have that in DXIL so thought to include it.
18 lines
732 B
LLVM
18 lines
732 B
LLVM
; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
|
|
|
|
; Make sure dxil operation function calls for exp are generated for float and half.
|
|
|
|
; CHECK-LABEL: exp_float4
|
|
; CHECK: fmul <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, %{{.*}}
|
|
; CHECK: call <4 x float> @llvm.exp2.v4f32(<4 x float> %{{.*}})
|
|
define noundef <4 x float> @exp_float4(<4 x float> noundef %p0) {
|
|
entry:
|
|
%p0.addr = alloca <4 x float>, align 16
|
|
store <4 x float> %p0, ptr %p0.addr, align 16
|
|
%0 = load <4 x float>, ptr %p0.addr, align 16
|
|
%elt.exp = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
|
|
ret <4 x float> %elt.exp
|
|
}
|
|
|
|
declare <4 x float> @llvm.exp.v4f32(<4 x float>)
|