This PR adds QuadReadAcrossY intrinsic support in HLSL with codegen for both DirectX and SPIRV backends. Resolves https://github.com/llvm/llvm-project/issues/99176. - [x] Implement `QuadReadAcrossY` clang builtin, - [x] Link `QuadReadAcrossY` clang builtin with `hlsl_intrinsics.h` - [x] Add sema checks for `QuadReadAcrossY` to `CheckHLSLBuiltinFunctionCall` in `SemaChecking.cpp` - [x] Add codegen for `QuadReadAcrossY` to `EmitHLSLBuiltinExpr` in `CGBuiltin.cpp` - [x] Add codegen tests to `clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl` - [x] Add sema tests to `clang/test/SemaHLSL/BuiltIns/QuadReadAcrossY-errors.hlsl` - [x] Create the `int_dx_QuadReadAcrossY` intrinsic in `IntrinsicsDirectX.td` - [x] Create the `DXILOpMapping` of `int_dx_QuadReadAcrossY` to `123` in `DXIL.td` - [x] Create the `QuadReadAcrossY.ll` and `QuadReadAcrossY_errors.ll` tests in `llvm/test/CodeGen/DirectX/` - [x] Create the `int_spv_QuadReadAcrossY` intrinsic in `IntrinsicsSPIRV.td` - [x] In SPIRVInstructionSelector.cpp create the `QuadReadAcrossY` lowering and map it to `int_spv_QuadReadAcrossY` in `SPIRVInstructionSelector::selectIntrinsic`. - [x] Create SPIR-V backend test case in `llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadAcrossY.ll`
93 lines
2.8 KiB
C++
93 lines
2.8 KiB
C++
//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++
|
|
//-*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "DirectXTargetTransformInfo.h"
|
|
#include "llvm/IR/Intrinsics.h"
|
|
#include "llvm/IR/IntrinsicsDirectX.h"
|
|
|
|
using namespace llvm;
|
|
|
|
bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(
|
|
Intrinsic::ID ID, unsigned ScalarOpdIdx) const {
|
|
switch (ID) {
|
|
case Intrinsic::dx_wave_readlane:
|
|
return ScalarOpdIdx == 1;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
|
|
int OpdIdx) const {
|
|
switch (ID) {
|
|
case Intrinsic::dx_asdouble:
|
|
case Intrinsic::dx_firstbitlow:
|
|
case Intrinsic::dx_firstbitshigh:
|
|
case Intrinsic::dx_firstbituhigh:
|
|
case Intrinsic::dx_isinf:
|
|
case Intrinsic::dx_isnan:
|
|
case Intrinsic::dx_legacyf16tof32:
|
|
case Intrinsic::dx_legacyf32tof16:
|
|
case Intrinsic::dx_wave_all_equal:
|
|
return OpdIdx == 0;
|
|
default:
|
|
// All DX intrinsics are overloaded on return type unless specified
|
|
// otherwise
|
|
return OpdIdx == -1;
|
|
}
|
|
}
|
|
|
|
bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
|
|
Intrinsic::ID ID) const {
|
|
switch (ID) {
|
|
case Intrinsic::dx_asdouble:
|
|
case Intrinsic::dx_firstbitlow:
|
|
case Intrinsic::dx_firstbitshigh:
|
|
case Intrinsic::dx_firstbituhigh:
|
|
case Intrinsic::dx_frac:
|
|
case Intrinsic::dx_isinf:
|
|
case Intrinsic::dx_isnan:
|
|
case Intrinsic::dx_legacyf16tof32:
|
|
case Intrinsic::dx_legacyf32tof16:
|
|
case Intrinsic::dx_rsqrt:
|
|
case Intrinsic::dx_saturate:
|
|
case Intrinsic::dx_splitdouble:
|
|
case Intrinsic::dx_wave_all_equal:
|
|
case Intrinsic::dx_wave_readlane:
|
|
case Intrinsic::dx_wave_reduce_or:
|
|
case Intrinsic::dx_wave_reduce_xor:
|
|
case Intrinsic::dx_wave_reduce_and:
|
|
case Intrinsic::dx_wave_reduce_max:
|
|
case Intrinsic::dx_wave_reduce_min:
|
|
case Intrinsic::dx_wave_reduce_sum:
|
|
case Intrinsic::dx_wave_product:
|
|
case Intrinsic::dx_wave_prefix_sum:
|
|
case Intrinsic::dx_wave_prefix_product:
|
|
case Intrinsic::dx_wave_reduce_umax:
|
|
case Intrinsic::dx_wave_reduce_umin:
|
|
case Intrinsic::dx_wave_reduce_usum:
|
|
case Intrinsic::dx_wave_uproduct:
|
|
case Intrinsic::dx_wave_prefix_usum:
|
|
case Intrinsic::dx_wave_prefix_uproduct:
|
|
case Intrinsic::dx_quad_read_across_x:
|
|
case Intrinsic::dx_quad_read_across_y:
|
|
case Intrinsic::dx_imad:
|
|
case Intrinsic::dx_umad:
|
|
case Intrinsic::dx_ddx_coarse:
|
|
case Intrinsic::dx_ddy_coarse:
|
|
case Intrinsic::dx_ddx_fine:
|
|
case Intrinsic::dx_ddy_fine:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|