[mlir][ROCDL] Wrap asyncmark and wait.asyncmark intrinsics (#181054)
(see op-level and LLVM documentation for details so I'm not repeating myself, but these are the general operations for compiler-operated asynchronous operation tracking, which frees programmers from having to deal with all the different counters, allows certain optimization, and doesn't require precise alias analysis) ----- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
0377416e36
commit
2ada4b8fb0
@ -590,6 +590,61 @@ def ROCDL_WaitTensorcntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.tensorcnt", [], 0,
|
||||
let assemblyFormat = "$count attr-dict";
|
||||
}
|
||||
|
||||
def ROCDL_AsyncmarkOp : ROCDL_ConcreteNonMemIntrOp<"asyncmark", [], 0>,
|
||||
Arguments<(ins)> {
|
||||
let summary = "Mark the end of a group of asynchronous operations";
|
||||
let description = [{
|
||||
This operation, in conjunction with `rocdl.wait.asyncmark`, forms the
|
||||
compiler-provided framework for tracking explicitly asynchronous
|
||||
memory operations, such as copies to LDS that use async intrinsics
|
||||
and gfx1250's tensor loads.
|
||||
|
||||
Details of its behavior can be found in
|
||||
[the LLVM documentation on async tracking](/llvm/docs/AMDGPUAsyncOperations.rst).
|
||||
|
||||
See `rocdl.wait.asyncmark`'s documentation for a usage example.
|
||||
|
||||
Available on gfx9 and later.
|
||||
}];
|
||||
let results = (outs);
|
||||
let assemblyFormat = "attr-dict";
|
||||
}
|
||||
|
||||
def ROCDL_WaitAsyncmarkOp: ROCDL_ConcreteNonMemIntrOp<"wait.asyncmark", [], 0, [0], ["count"]>,
|
||||
Arguments<(ins I16Attr:$count)> {
|
||||
let summary = "Wait until N or fewer async operation groups are unexecuted";
|
||||
let description = [{
|
||||
This operation, along with `rocdl.asyncmark`, forms the compiler-provided
|
||||
framework for explicitly tracking asynchronous operations.
|
||||
|
||||
At the point where a wait.asyncmark operation is executed, all async operations
|
||||
that were parts of any async group (established by asyncmark in program order)
|
||||
other than the `count` previously-added ones will have finished executing.
|
||||
|
||||
For more detail, including on how this mechanism composes with function calls,
|
||||
see [the LLVM documentation on async tracking](/llvm/docs/AMDGPUAsyncOperations.rst).
|
||||
|
||||
Available on gfx9 and later.
|
||||
|
||||
Example:
|
||||
```mlir
|
||||
rocdl.tensor.load.to.lds ...
|
||||
rocdl.global.async.load.to.lds ...
|
||||
|
||||
rocdl.asyncmark
|
||||
|
||||
rocdl.tensor.load.to.lds ...
|
||||
rocdl.global.async.load.to.lds ...
|
||||
|
||||
rocdl.asyncmark
|
||||
|
||||
rocdl.wait.asyncmark 1 // First group of loads completes after this
|
||||
```
|
||||
}];
|
||||
let results = (outs);
|
||||
let assemblyFormat = "$count attr-dict";
|
||||
}
|
||||
|
||||
def ROCDL_SetPrioOp : ROCDL_ConcreteNonMemIntrOp<"s.setprio", [], 0, [0], ["priority"]>,
|
||||
Arguments<(ins I16Attr:$priority)> {
|
||||
let assemblyFormat = "$priority attr-dict";
|
||||
|
||||
@ -1256,6 +1256,20 @@ llvm.func @rocdl.s.wait.tensorcnt() {
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @rocdl.asyncmark() {
|
||||
// CHECK-LABEL: rocdl.asyncmark
|
||||
// CHECK: rocdl.asyncmark
|
||||
rocdl.asyncmark
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @rocdl.wait.asyncmark() {
|
||||
// CHECK-LABEL: rocdl.wait.asyncmark
|
||||
// CHECK: rocdl.wait.asyncmark 0
|
||||
rocdl.wait.asyncmark 0
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @rocdl.readfirstlane(%src : f32) -> f32 {
|
||||
|
||||
@ -391,6 +391,20 @@ llvm.func @rocdl.s.wait.tensorcnt() {
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @rocdl.asyncmark() {
|
||||
// CHECK-LABEL: rocdl.asyncmark
|
||||
// CHECK-NEXT: call void @llvm.amdgcn.asyncmark()
|
||||
rocdl.asyncmark
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @rocdl.wait.asyncmark() {
|
||||
// CHECK-LABEL: rocdl.wait.asyncmark
|
||||
// CHECK-NEXT: call void @llvm.amdgcn.wait.asyncmark(i16 0)
|
||||
rocdl.wait.asyncmark 0
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @rocdl.setprio() {
|
||||
// CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
|
||||
rocdl.s.setprio 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user