
Add a prologue to the kernel entry to handle cases where code designed for kernarg preloading is executed on hardware equipped with incompatible firmware. If hardware has compatible firmware the 256 bytes at the start of the kernel entry will be skipped. This skipping is done automatically by hardware that supports the feature. A pass is added which is intended to be run at the very end of the pipeline to avoid any optimizations that would assume the prologue is a real predecessor block to the actual code start. In reality we have two possible entry points for the function. 1. The optimized path that supports kernarg preloading which begins at an offset of 256 bytes. 2. The backwards compatible entry point which starts at offset 0.
26 lines
845 B
C++
26 lines
845 B
C++
//===- AMDGPUPreloadKernargProlog.h ----------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_PRELOAD_KERNARG_PROLOG_H
|
|
#define LLVM_LIB_TARGET_AMDGPU_PRELOAD_KERNARG_PROLOG_H
|
|
|
|
#include "llvm/CodeGen/MachinePassManager.h"
|
|
|
|
namespace llvm {
|
|
|
|
class AMDGPUPreloadKernArgPrologPass
|
|
: public PassInfoMixin<AMDGPUPreloadKernArgPrologPass> {
|
|
public:
|
|
PreservedAnalyses run(MachineFunction &MF,
|
|
MachineFunctionAnalysisManager &AM);
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_PRELOAD_KERNARG_PROLOG_H
|