Krzysztof Drewniak 704808c275
[mlir][affine] Add static basis support to affine.delinearize (#113846)
This commit makes `affine.delinealize` join other indexing operators,
like `vector.extract`, which store a mixed static/dynamic set of sizes,
offsets, or such. In this case, the `basis` (the set of values that will
be used to decompose the linear index) is now stored as an array of
index attributes where the basis is statically known, eliminating the
need to cretae constants.

This commit also adds copies of the delinearize utility in the affine
dialect to allow it to take an array of `OpFoldResult`s and extends te
DynamicIndexList parser/printer to allow specifying the delimiters in
tablegen (this is needed to avoid breaking existing syntax).

---------

Co-authored-by: Jakub Kuderski <kubakuderski@gmail.com>
2024-11-04 14:59:13 -06:00

266 lines
9.9 KiB
Python

# RUN: %PYTHON %s | FileCheck %s
from mlir.ir import *
from mlir.dialects import func
from mlir.dialects import arith
from mlir.dialects import memref
from mlir.dialects import affine
import mlir.extras.types as T
def constructAndPrintInModule(f):
print("\nTEST:", f.__name__)
with Context(), Location.unknown():
module = Module.create()
with InsertionPoint(module.body):
f()
print(module)
return f
# CHECK-LABEL: TEST: testAffineStoreOp
@constructAndPrintInModule
def testAffineStoreOp():
f32 = F32Type.get()
index_type = IndexType.get()
memref_type_out = MemRefType.get([12, 12], f32)
# CHECK: func.func @affine_store_test(%[[ARG0:.*]]: index) -> memref<12x12xf32> {
@func.FuncOp.from_py_func(index_type)
def affine_store_test(arg0):
# CHECK: %[[O_VAR:.*]] = memref.alloc() : memref<12x12xf32>
mem = memref.AllocOp(memref_type_out, [], []).result
d0 = AffineDimExpr.get(0)
s0 = AffineSymbolExpr.get(0)
map = AffineMap.get(1, 1, [s0 * 3, d0 + s0 + 1])
# CHECK: %[[A1:.*]] = arith.constant 2.100000e+00 : f32
a1 = arith.ConstantOp(f32, 2.1)
# CHECK: affine.store %[[A1]], %alloc[symbol(%[[ARG0]]) * 3, %[[ARG0]] + symbol(%[[ARG0]]) + 1] : memref<12x12xf32>
affine.AffineStoreOp(a1, mem, indices=[arg0, arg0], map=map)
return mem
# CHECK-LABEL: TEST: testAffineDelinearizeInfer
@constructAndPrintInModule
def testAffineDelinearizeInfer():
# CHECK: %[[C1:.*]] = arith.constant 1 : index
c1 = arith.ConstantOp(T.index(), 1)
# CHECK: %{{.*}}:2 = affine.delinearize_index %[[C1:.*]] into (2, 3) : index, index
two_indices = affine.AffineDelinearizeIndexOp(c1, [], [2, 3])
# CHECK-LABEL: TEST: testAffineLoadOp
@constructAndPrintInModule
def testAffineLoadOp():
f32 = F32Type.get()
index_type = IndexType.get()
memref_type_in = MemRefType.get([10, 10], f32)
# CHECK: func.func @affine_load_test(%[[I_VAR:.*]]: memref<10x10xf32>, %[[ARG0:.*]]: index) -> f32 {
@func.FuncOp.from_py_func(memref_type_in, index_type)
def affine_load_test(I, arg0):
d0 = AffineDimExpr.get(0)
s0 = AffineSymbolExpr.get(0)
map = AffineMap.get(1, 1, [s0 * 3, d0 + s0 + 1])
# CHECK: {{.*}} = affine.load %[[I_VAR]][symbol(%[[ARG0]]) * 3, %[[ARG0]] + symbol(%[[ARG0]]) + 1] : memref<10x10xf32>
a1 = affine.AffineLoadOp(f32, I, indices=[arg0, arg0], map=map)
return a1
# CHECK-LABEL: TEST: testAffineForOp
@constructAndPrintInModule
def testAffineForOp():
f32 = F32Type.get()
index_type = IndexType.get()
memref_type = MemRefType.get([1024], f32)
# CHECK: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (0, d0 + s0)>
# CHECK: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d0 - 2, d1 * 32)>
# CHECK: func.func @affine_for_op_test(%[[BUFFER:.*]]: memref<1024xf32>) {
@func.FuncOp.from_py_func(memref_type)
def affine_for_op_test(buffer):
# CHECK: %[[C1:.*]] = arith.constant 1 : index
c1 = arith.ConstantOp(index_type, 1)
# CHECK: %[[C2:.*]] = arith.constant 2 : index
c2 = arith.ConstantOp(index_type, 2)
# CHECK: %[[C3:.*]] = arith.constant 3 : index
c3 = arith.ConstantOp(index_type, 3)
# CHECK: %[[C9:.*]] = arith.constant 9 : index
c9 = arith.ConstantOp(index_type, 9)
# CHECK: %[[AC0:.*]] = arith.constant 0.000000e+00 : f32
ac0 = AffineConstantExpr.get(0)
d0 = AffineDimExpr.get(0)
d1 = AffineDimExpr.get(1)
s0 = AffineSymbolExpr.get(0)
lb = AffineMap.get(1, 1, [ac0, d0 + s0])
ub = AffineMap.get(2, 0, [d0 - 2, 32 * d1])
sum_0 = arith.ConstantOp(f32, 0.0)
# CHECK: %0 = affine.for %[[INDVAR:.*]] = max #[[MAP0]](%[[C2]])[%[[C3]]] to min #[[MAP1]](%[[C9]], %[[C1]]) step 2 iter_args(%[[SUM0:.*]] = %[[AC0]]) -> (f32) {
sum = affine.AffineForOp(
lb,
ub,
2,
iter_args=[sum_0],
lower_bound_operands=[c2, c3],
upper_bound_operands=[c9, c1],
)
with InsertionPoint(sum.body):
# CHECK: %[[TMP:.*]] = memref.load %[[BUFFER]][%[[INDVAR]]] : memref<1024xf32>
tmp = memref.LoadOp(buffer, [sum.induction_variable])
sum_next = arith.AddFOp(sum.inner_iter_args[0], tmp)
affine.AffineYieldOp([sum_next])
# CHECK-LABEL: TEST: testAffineForOpErrors
@constructAndPrintInModule
def testAffineForOpErrors():
c1 = arith.ConstantOp(T.index(), 1)
c2 = arith.ConstantOp(T.index(), 2)
c3 = arith.ConstantOp(T.index(), 3)
d0 = AffineDimExpr.get(0)
try:
affine.AffineForOp(
c1,
c2,
1,
lower_bound_operands=[c3],
upper_bound_operands=[],
)
except ValueError as e:
assert (
e.args[0]
== "Either a concrete lower bound or an AffineMap in combination with lower bound operands, but not both, is supported."
)
try:
affine.AffineForOp(
AffineMap.get_constant(1),
c2,
1,
lower_bound_operands=[c3, c3],
upper_bound_operands=[],
)
except ValueError as e:
assert (
e.args[0]
== "Wrong number of lower bound operands passed to AffineForOp; Expected 0, got 2."
)
try:
two_indices = affine.AffineDelinearizeIndexOp(c1, [], [1, 1])
affine.AffineForOp(
two_indices,
c2,
1,
lower_bound_operands=[],
upper_bound_operands=[],
)
except ValueError as e:
assert e.args[0] == "Only a single concrete value is supported for lower bound."
try:
affine.AffineForOp(
1.0,
c2,
1,
lower_bound_operands=[],
upper_bound_operands=[],
)
except ValueError as e:
assert e.args[0] == "lower bound must be int | ResultValueT | AffineMap."
@constructAndPrintInModule
def testForSugar():
memref_t = T.memref(10, T.index())
range = affine.for_
# CHECK: #[[$ATTR_2:.+]] = affine_map<(d0) -> (d0)>
# CHECK-LABEL: func.func @range_loop_1(
# CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: memref<10xindex>) {
# CHECK: affine.for %[[VAL_3:.*]] = #[[$ATTR_2]](%[[VAL_0]]) to #[[$ATTR_2]](%[[VAL_1]]) {
# CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_3]] : index
# CHECK: memref.store %[[VAL_4]], %[[VAL_2]]{{\[}}%[[VAL_3]]] : memref<10xindex>
# CHECK: }
# CHECK: return
# CHECK: }
@func.FuncOp.from_py_func(T.index(), T.index(), memref_t)
def range_loop_1(lb, ub, memref_v):
for i in range(lb, ub, step=1):
add = arith.addi(i, i)
memref.store(add, memref_v, [i])
affine.yield_([])
# CHECK-LABEL: func.func @range_loop_2(
# CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: memref<10xindex>) {
# CHECK: affine.for %[[VAL_3:.*]] = #[[$ATTR_2]](%[[VAL_0]]) to 10 {
# CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_3]] : index
# CHECK: memref.store %[[VAL_4]], %[[VAL_2]]{{\[}}%[[VAL_3]]] : memref<10xindex>
# CHECK: }
# CHECK: return
# CHECK: }
@func.FuncOp.from_py_func(T.index(), T.index(), memref_t)
def range_loop_2(lb, ub, memref_v):
for i in range(lb, 10, step=1):
add = arith.addi(i, i)
memref.store(add, memref_v, [i])
affine.yield_([])
# CHECK-LABEL: func.func @range_loop_3(
# CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: memref<10xindex>) {
# CHECK: affine.for %[[VAL_3:.*]] = 0 to #[[$ATTR_2]](%[[VAL_1]]) {
# CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_3]] : index
# CHECK: memref.store %[[VAL_4]], %[[VAL_2]]{{\[}}%[[VAL_3]]] : memref<10xindex>
# CHECK: }
# CHECK: return
# CHECK: }
@func.FuncOp.from_py_func(T.index(), T.index(), memref_t)
def range_loop_3(lb, ub, memref_v):
for i in range(0, ub, step=1):
add = arith.addi(i, i)
memref.store(add, memref_v, [i])
affine.yield_([])
# CHECK-LABEL: func.func @range_loop_4(
# CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: memref<10xindex>) {
# CHECK: affine.for %[[VAL_3:.*]] = 0 to 10 {
# CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_3]] : index
# CHECK: memref.store %[[VAL_4]], %[[VAL_2]]{{\[}}%[[VAL_3]]] : memref<10xindex>
# CHECK: }
# CHECK: return
# CHECK: }
@func.FuncOp.from_py_func(T.index(), T.index(), memref_t)
def range_loop_4(lb, ub, memref_v):
for i in range(0, 10, step=1):
add = arith.addi(i, i)
memref.store(add, memref_v, [i])
affine.yield_([])
# CHECK-LABEL: func.func @range_loop_8(
# CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: memref<10xindex>) {
# CHECK: %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 10 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (memref<10xindex>) {
# CHECK: %[[VAL_6:.*]] = arith.addi %[[VAL_4]], %[[VAL_4]] : index
# CHECK: memref.store %[[VAL_6]], %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<10xindex>
# CHECK: affine.yield %[[VAL_5]] : memref<10xindex>
# CHECK: }
# CHECK: return
# CHECK: }
@func.FuncOp.from_py_func(T.index(), T.index(), memref_t)
def range_loop_8(lb, ub, memref_v):
for i, it in range(0, 10, iter_args=[memref_v]):
add = arith.addi(i, i)
memref.store(add, it, [i])
affine.yield_([it])