llvm-project/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
Andrzej Warzynski 447bb5bee4 [mlir][ArmSME] Introduce new lowering layer (Vector -> ArmSME)
At the moment, the lowering from the Vector dialect to SME looks like
this:

  * Vector --> SME LLVM IR intrinsics

This patch introduces a new lowering layer between the Vector dialect
and the Arm SME extension:

  * Vector --> ArmSME dialect (custom Ops) --> SME LLVM IR intrinsics.

This is motivated by 2 considerations:
1. Storing `ZA` to memory (e.g. `vector.transfer_write`) requires an
   `scf.for` loop over all rows of `ZA`. Similar logic will apply to
   "load to ZA from memory". This is a rather complex transformation and
   a custom Op seems justified.
2. As discussed in [1], we need to prevent the LLVM type converter from
   having to convert types unsupported in LLVM, e.g.
   `vector<[16]x[16]xi8>`. A dedicated abstraction layer with custom Ops
   opens a path to some fine tuning (e.g. custom type converters) that
   will allow us to avoid this.

To facilitate this change, two new custom SME Op are introduced:

  * `TileStoreOp`, and
  * `ZeroOp`.

Note that no new functionality is added - these Ops merely model what's
already supported. In particular, the following tile size is assumed
(dimension and element size are fixed):

  * `vector<[16]x[16]xi8>`

The new lowering layer is introduced via a conversion pass between the
Vector and the SME dialects. You can use the `-convert-vector-to-sme`
flag to run it. The following function:
```
func.func @example(%arg0 : memref<?x?xi8>) {
  // (...)
  %cst = arith.constant dense<0> : vector<[16]x[16]xi8>
  vector.transfer_write %cst, %arg0 : vector<[16]x[16]xi8>, memref<?x?xi8>
  return
}
```
would be lowered to:
```
  func.func @example(%arg0: memref<?x?xi8>) {
    // (...)
    %0 = arm_sme.zero : vector<[16]x[16]xi8>
    arm_sme.tile_store %arg0[%c0, %c0], %0 : memref<?x?xi8>, vector<[16]x[16]xi8>
    return
  }
```

Later, a mechanism will be introduced to guarantee that `arm_sme.zero`
and `arm_sme.tile_store` operate on the same virtual tile. For `i8`
elements this is not required as there is only one tile.

In order to lower the above output to LLVM, use
  * `-convert-vector-to-llvm="enable-arm-sme"`.

[1] https://github.com/openxla/iree/issues/14294

Reviewed By: WanderAway

Differential Revision: https://reviews.llvm.org/D154867
2023-07-18 08:04:59 +00:00

126 lines
4.9 KiB
C++

//===- VectorToLLVM.cpp - Conversion from Vector to the LLVM dialect ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Dialect/AMX/AMXDialect.h"
#include "mlir/Dialect/AMX/Transforms.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/ArmNeon/ArmNeonDialect.h"
#include "mlir/Dialect/ArmSME/IR/ArmSME.h"
#include "mlir/Dialect/ArmSME/Transforms/Transforms.h"
#include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
#include "mlir/Dialect/ArmSVE/Transforms.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/X86Vector/Transforms.h"
#include "mlir/Dialect/X86Vector/X86VectorDialect.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
namespace mlir {
#define GEN_PASS_DEF_CONVERTVECTORTOLLVMPASS
#include "mlir/Conversion/Passes.h.inc"
} // namespace mlir
using namespace mlir;
using namespace mlir::vector;
namespace {
struct LowerVectorToLLVMPass
: public impl::ConvertVectorToLLVMPassBase<LowerVectorToLLVMPass> {
using Base::Base;
// Override explicitly to allow conditional dialect dependence.
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<LLVM::LLVMDialect>();
registry.insert<arith::ArithDialect>();
registry.insert<memref::MemRefDialect>();
if (armNeon)
registry.insert<arm_neon::ArmNeonDialect>();
if (armSVE)
registry.insert<arm_sve::ArmSVEDialect>();
if (armSME)
registry.insert<arm_sme::ArmSMEDialect>();
if (amx)
registry.insert<amx::AMXDialect>();
if (x86Vector)
registry.insert<x86vector::X86VectorDialect>();
}
void runOnOperation() override;
};
} // namespace
void LowerVectorToLLVMPass::runOnOperation() {
// Perform progressive lowering of operations on slices and
// all contraction operations. Also applies folding and DCE.
{
RewritePatternSet patterns(&getContext());
populateVectorToVectorCanonicalizationPatterns(patterns);
populateVectorBroadcastLoweringPatterns(patterns);
populateVectorContractLoweringPatterns(patterns, VectorTransformsOptions());
populateVectorMaskOpLoweringPatterns(patterns);
populateVectorShapeCastLoweringPatterns(patterns);
populateVectorTransposeLoweringPatterns(patterns,
VectorTransformsOptions());
// Vector transfer ops with rank > 1 should be lowered with VectorToSCF.
populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
}
// Convert to the LLVM IR dialect.
LowerToLLVMOptions options(&getContext());
options.useOpaquePointers = useOpaquePointers;
LLVMTypeConverter converter(&getContext(), options);
RewritePatternSet patterns(&getContext());
populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices);
populateVectorTransferLoweringPatterns(patterns);
populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
populateVectorToLLVMConversionPatterns(
converter, patterns, reassociateFPReductions, force32BitVectorIndices);
populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
// Architecture specific augmentations.
LLVMConversionTarget target(getContext());
target.addLegalDialect<arith::ArithDialect>();
target.addLegalDialect<memref::MemRefDialect>();
target.addLegalOp<UnrealizedConversionCastOp>();
if (armNeon) {
// TODO: we may or may not want to include in-dialect lowering to
// LLVM-compatible operations here. So far, all operations in the dialect
// can be translated to LLVM IR so there is no conversion necessary.
target.addLegalDialect<arm_neon::ArmNeonDialect>();
}
if (armSVE) {
configureArmSVELegalizeForExportTarget(target);
populateArmSVELegalizeForLLVMExportPatterns(converter, patterns);
}
if (armSME) {
configureArmSMELegalizeForExportTarget(target);
populateArmSMELegalizeForLLVMExportPatterns(converter, patterns);
}
if (amx) {
configureAMXLegalizeForExportTarget(target);
populateAMXLegalizeForLLVMExportPatterns(converter, patterns);
}
if (x86Vector) {
configureX86VectorLegalizeForExportTarget(target);
populateX86VectorLegalizeForLLVMExportPatterns(converter, patterns);
}
if (failed(
applyPartialConversion(getOperation(), target, std::move(patterns))))
signalPassFailure();
}