Akshay Deodhar 9ccf038615
[NVPTX] Support for fence.acquire and fence.release (#124865)
Adds codegen support for fence.acquire and fence.release, a script and
generated tests for all possible legal fences, and cleans up some
tablegen rules.
2025-02-04 14:20:00 -08:00

57 lines
1.8 KiB
Python

# For manual usage, not as a part of lit tests. Used for generating the following tests:
# fence-sm30.ll, fence-sm70.ll, fence-sm90.ll
from string import Template
from itertools import product
fence_func = Template(
"""
define void @fence_${ordering}_${ptx_scope}() {
fence syncscope(\"${llvm_scope}\") ${ordering}
ret void
}
"""
)
run_statement = Template(
"""; RUN: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm}
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verify %}"""
)
# (sm, ptx)
TESTS = [(30, 50), (70, 60), (90, 87)]
LLVM_SCOPES_NO_CLUSTER = ["", "block", "device"]
SCOPE_LLVM_TO_PTX = {"": "sys", "block": "cta", "cluster": "cluster", "device": "gpu"}
ORDERINGS = ["acquire", "release", "acq_rel", "seq_cst"]
if __name__ == "__main__":
# non-cluster orderings are supported on SM30, SM70 and SM90
with open("fence-nocluster.ll", "w") as fp:
for sm, ptx in TESTS:
print(run_statement.substitute(sm=sm, ptx=ptx), file=fp)
for ordering, llvm_scope in product(ORDERINGS, LLVM_SCOPES_NO_CLUSTER):
print(
fence_func.substitute(
llvm_scope=llvm_scope,
ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
ordering=ordering,
),
file=fp,
)
# cluster ordering only supported on SM90
with open("fence-cluster.ll", "w") as fp:
print(run_statement.substitute(sm=90, ptx=87), file=fp)
for ordering in ORDERINGS:
print(
fence_func.substitute(
llvm_scope="cluster",
ptx_scope=SCOPE_LLVM_TO_PTX["cluster"],
ordering=ordering,
),
file=fp,
)