//===- VectorAnalysis.cpp - Analysis for Vectorization --------------------===// // // Copyright 2019 The MLIR Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= #include "mlir/Analysis/VectorAnalysis.h" #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/LoopAnalysis.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Instructions.h" #include "mlir/StandardOps/StandardOps.h" #include "mlir/SuperVectorOps/SuperVectorOps.h" #include "mlir/Support/Functional.h" #include "mlir/Support/STLExtras.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" /// /// Implements Analysis functions specific to vectors which support /// the vectorization and vectorization materialization passes. /// using namespace mlir; #define DEBUG_TYPE "vector-analysis" using llvm::dbgs; using llvm::SetVector; Optional> mlir::shapeRatio(ArrayRef superShape, ArrayRef subShape) { if (superShape.size() < subShape.size()) { return Optional>(); } // Starting from the end, compute the integer divisors. // Set the boolean `divides` if integral division is not possible. std::vector result; result.reserve(superShape.size()); bool divides = true; auto divide = [÷s, &result](int superSize, int subSize) { assert(superSize > 0 && "superSize must be > 0"); assert(subSize > 0 && "subSize must be > 0"); divides &= (superSize % subSize == 0); result.push_back(superSize / subSize); }; functional::zipApply( divide, SmallVector{superShape.rbegin(), superShape.rend()}, SmallVector{subShape.rbegin(), subShape.rend()}); // If integral division does not occur, return and let the caller decide. if (!divides) { return None; } // At this point we computed the ratio (in reverse) for the common // size. Fill with the remaining entries from the super-vector shape (still in // reverse). int commonSize = subShape.size(); std::copy(superShape.rbegin() + commonSize, superShape.rend(), std::back_inserter(result)); assert(result.size() == superShape.size() && "super to sub shape ratio is not of the same size as the super rank"); // Reverse again to get it back in the proper order and return. return SmallVector{result.rbegin(), result.rend()}; } Optional> mlir::shapeRatio(VectorType superVectorType, VectorType subVectorType) { assert(superVectorType.getElementType() == subVectorType.getElementType() && "vector types must be of the same elemental type"); return shapeRatio(superVectorType.getShape(), subVectorType.getShape()); } /// Constructs a permutation map from memref indices to vector dimension. /// /// The implementation uses the knowledge of the mapping of enclosing loop to /// vector dimension. `enclosingLoopToVectorDim` carries this information as a /// map with: /// - keys representing "vectorized enclosing loops"; /// - values representing the corresponding vector dimension. /// The algorithm traverses "vectorized enclosing loops" and extracts the /// at-most-one MemRef index that is invariant along said loop. This index is /// guaranteed to be at most one by construction: otherwise the MemRef is not /// vectorizable. /// If this invariant index is found, it is added to the permutation_map at the /// proper vector dimension. /// If no index is found to be invariant, 0 is added to the permutation_map and /// corresponds to a vector broadcast along that dimension. /// /// Examples can be found in the documentation of `makePermutationMap`, in the /// header file. static AffineMap makePermutationMap( MLIRContext *context, llvm::iterator_range indices, const DenseMap &enclosingLoopToVectorDim) { using functional::makePtrDynCaster; using functional::map; auto unwrappedIndices = map(makePtrDynCaster(), indices); SmallVector perm(enclosingLoopToVectorDim.size(), getAffineConstantExpr(0, context)); for (auto kvp : enclosingLoopToVectorDim) { assert(kvp.second < perm.size()); auto invariants = getInvariantAccesses(*kvp.first, unwrappedIndices); unsigned numIndices = unwrappedIndices.size(); unsigned countInvariantIndices = 0; for (unsigned dim = 0; dim < numIndices; ++dim) { if (!invariants.count(unwrappedIndices[dim])) { assert(perm[kvp.second] == getAffineConstantExpr(0, context) && "permutationMap already has an entry along dim"); perm[kvp.second] = getAffineDimExpr(dim, context); } else { ++countInvariantIndices; } } assert((countInvariantIndices == numIndices || countInvariantIndices == numIndices - 1) && "Vectorization prerequisite violated: at most 1 index may be " "invariant wrt a vectorized loop"); } return AffineMap::get(unwrappedIndices.size(), 0, perm, {}); } /// Implementation detail that walks up the parents and records the ones with /// the specified type. /// TODO(ntv): could also be implemented as a collect parents followed by a /// filter and made available outside this file. template static SetVector getParentsOfType(Instruction *inst) { SetVector res; auto *current = inst; while (auto *parent = current->getParentInst()) { auto *typedParent = dyn_cast(parent); if (typedParent) { assert(res.count(typedParent) == 0 && "Already inserted"); res.insert(typedParent); } current = parent; } return res; } /// Returns the enclosing ForInst, from closest to farthest. static SetVector getEnclosingforInsts(Instruction *inst) { return getParentsOfType(inst); } AffineMap mlir::makePermutationMap(OperationInst *opInst, const DenseMap &loopToVectorDim) { DenseMap enclosingLoopToVectorDim; auto enclosingLoops = getEnclosingforInsts(opInst); for (auto *forInst : enclosingLoops) { auto it = loopToVectorDim.find(forInst); if (it != loopToVectorDim.end()) { enclosingLoopToVectorDim.insert(*it); } } if (auto load = opInst->dyn_cast()) { return ::makePermutationMap(opInst->getContext(), load->getIndices(), enclosingLoopToVectorDim); } auto store = opInst->cast(); return ::makePermutationMap(opInst->getContext(), store->getIndices(), enclosingLoopToVectorDim); } bool mlir::matcher::operatesOnSuperVectors(const OperationInst &opInst, VectorType subVectorType) { // First, extract the vector type and ditinguish between: // a. ops that *must* lower a super-vector (i.e. vector_transfer_read, // vector_transfer_write); and // b. ops that *may* lower a super-vector (all other ops). // The ops that *may* lower a super-vector only do so if the super-vector to // sub-vector ratio exists. The ops that *must* lower a super-vector are // explicitly checked for this property. /// TODO(ntv): there should be a single function for all ops to do this so we /// do not have to special case. Maybe a trait, or just a method, unclear atm. bool mustDivide = false; VectorType superVectorType; if (auto read = opInst.dyn_cast()) { superVectorType = read->getResultType(); mustDivide = true; } else if (auto write = opInst.dyn_cast()) { superVectorType = write->getVectorType(); mustDivide = true; } else if (opInst.getNumResults() == 0) { if (!opInst.isa()) { opInst.emitError("NYI: assuming only return instructions can have 0 " " results at this point"); } return false; } else if (opInst.getNumResults() == 1) { if (auto v = opInst.getResult(0)->getType().dyn_cast()) { superVectorType = v; } else { // Not a vector type. return false; } } else { // Not a vector_transfer and has more than 1 result, fail hard for now to // wake us up when something changes. opInst.emitError("NYI: instruction has more than 1 result"); return false; } // Get the ratio. auto ratio = shapeRatio(superVectorType, subVectorType); // Sanity check. assert((ratio.hasValue() || !mustDivide) && "vector_transfer instruction in which super-vector size is not an" " integer multiple of sub-vector size"); // This catches cases that are not strictly necessary to have multiplicity but // still aren't divisible by the sub-vector shape. // This could be useful information if we wanted to reshape at the level of // the vector type (but we would have to look at the compute and distinguish // between parallel, reduction and possibly other cases. if (!ratio.hasValue()) { return false; } return true; } namespace { /// A `SingleResultAffineNormalizer` is a helper class that is not visible to /// the user and supports renumbering operands of single-result AffineApplyOp. /// This operates on the assumption that only single-result unbounded AffineMap /// are used for all operands. /// This acts as a reindexing map of Value* to positional dims or symbols and /// allows simplifications such as: /// /// ```mlir /// %1 = affine_apply (d0, d1) -> (d0 - d1) (%0, %0) /// ``` /// /// into: /// /// ```mlir /// %1 = affine_apply () -> (0) /// ``` struct SingleResultAffineNormalizer { SingleResultAffineNormalizer(AffineMap map, ArrayRef operands); /// Returns the single result, unbounded, AffineMap resulting from /// normalization. AffineMap getAffineMap() { return AffineMap::get(reorderedDims.size(), reorderedSymbols.size(), {expr}, {}); } SmallVector getOperands() { SmallVector res(reorderedDims); res.append(reorderedSymbols.begin(), reorderedSymbols.end()); return res; } private: /// Helper function to insert `v` into the coordinate system of the current /// SingleResultAffineNormalizer (i.e. in the proper `xxxValueToPosition` and /// the proper `reorderedXXX`). /// Returns the AffineDimExpr or AffineSymbolExpr with the correponding /// renumbered position. template DimOrSymbol renumberOneIndex(Value *v); /// Given an `other` normalizer, this rewrites `other.expr` in the coordinate /// system of the current SingleResultAffineNormalizer. /// Returns the rewritten AffineExpr. AffineExpr renumber(const SingleResultAffineNormalizer &other); /// Given an `app` with single result and unbounded AffineMap, this rewrites /// the app's map single result AffineExpr in the coordinate system of the /// current SingleResultAffineNormalizer. /// Returns the rewritten AffineExpr. AffineExpr renumber(AffineApplyOp *app); /// Maps of Value* to position in the `expr`. DenseMap dimValueToPosition; DenseMap symValueToPosition; /// Ordered dims and symbols matching positional dims and symbols in `expr`. SmallVector reorderedDims; SmallVector reorderedSymbols; AffineExpr expr; }; } // namespace template static DimOrSymbol make(unsigned position, MLIRContext *context); template <> AffineDimExpr make(unsigned position, MLIRContext *context) { return getAffineDimExpr(position, context).cast(); } template <> AffineSymbolExpr make(unsigned position, MLIRContext *context) { return getAffineSymbolExpr(position, context).cast(); } template DimOrSymbol SingleResultAffineNormalizer::renumberOneIndex(Value *v) { static_assert(std::is_same::value || std::is_same::value, "renumber(...) or renumber(...) " "required"); DenseMap &pos = std::is_same::value ? symValueToPosition : dimValueToPosition; DenseMap::iterator iterPos; bool inserted = false; std::tie(iterPos, inserted) = pos.insert(std::make_pair(v, pos.size())); if (inserted) { std::is_same::value ? reorderedDims.push_back(v) : reorderedSymbols.push_back(v); } return make(iterPos->second, v->getFunction()->getContext()); } AffineExpr SingleResultAffineNormalizer::renumber( const SingleResultAffineNormalizer &other) { SmallVector dimRemapping, symRemapping; for (auto kvp : other.dimValueToPosition) { if (dimRemapping.size() <= kvp.second) dimRemapping.resize(kvp.second + 1); dimRemapping[kvp.second] = renumberOneIndex(kvp.first); } for (auto kvp : other.symValueToPosition) { if (symRemapping.size() <= kvp.second) symRemapping.resize(kvp.second + 1); symRemapping[kvp.second] = renumberOneIndex(kvp.first); } return other.expr.replaceDimsAndSymbols(dimRemapping, symRemapping); } AffineExpr SingleResultAffineNormalizer::renumber(AffineApplyOp *app) { // Sanity check, single result AffineApplyOp if one wants to use this. assert(app->getNumResults() == 1 && "Not a single result AffineApplyOp"); assert(app->getAffineMap().getRangeSizes().empty() && "Non-empty range sizes"); // Create the SingleResultAffineNormalizer for the operands of this // AffineApplyOp and combine it with the current SingleResultAffineNormalizer. using ValueTy = decltype(*(app->getOperands().begin())); SingleResultAffineNormalizer normalizer( app->getAffineMap(), functional::map([](ValueTy v) { return static_cast(v); }, app->getOperands())); // We know this is a single result AffineMap, we need to append a // renumbered AffineExpr. return renumber(normalizer); } SingleResultAffineNormalizer::SingleResultAffineNormalizer( AffineMap map, ArrayRef operands) { assert(map.getNumResults() == 1 && "Single-result map expected"); assert(map.getRangeSizes().empty() && "Unbounded map expected"); assert(map.getNumInputs() == operands.size() && "number of operands does not match the number of map inputs"); if (operands.empty()) { return; } auto *context = operands[0]->getFunction()->getContext(); SmallVector exprs; for (auto en : llvm::enumerate(operands)) { auto *t = en.value(); assert(t->getType().isIndex()); if (auto inst = t->getDefiningInst()) { if (auto app = inst->dyn_cast()) { // Sanity check, AffineApplyOp must always be composed by construction // and there can only ever be a dependence chain of 1 AffineApply. So we // can never get a second AffineApplyOp. // This also guarantees we can build another // SingleResultAffineNormalizer here that does not recurse a second // time. for (auto *pred : app->getOperands()) { assert(!pred->getDefiningInst() || !pred->getDefiningInst()->isa() && "AffineApplyOp chain of length > 1"); (void)pred; } exprs.push_back(renumber(app)); } else if (auto constant = inst->dyn_cast()) { // Constants remain constants. auto affineConstant = inst->cast(); exprs.push_back( getAffineConstantExpr(affineConstant->getValue(), context)); } else { // DimOp, top of the function symbols are all symbols. exprs.push_back(renumberOneIndex(t)); } } else if (en.index() < map.getNumDims()) { assert(isa(t) && "ForInst expected for AffineDimExpr"); exprs.push_back(renumberOneIndex(t)); } else { assert(!isa(t) && "unexpectd ForInst for a AffineSymbolExpr"); exprs.push_back(renumberOneIndex(t)); } } auto exprsMap = AffineMap::get(dimValueToPosition.size(), symValueToPosition.size(), exprs, {}); expr = composeWithUnboundedMap(map.getResult(0), exprsMap); LLVM_DEBUG(map.getResult(0).print(dbgs() << "\nCompose expr: ")); LLVM_DEBUG(exprsMap.print(dbgs() << "\nWith map: ")); LLVM_DEBUG(expr.print(dbgs() << "\nResult: ")); } OpPointer mlir::makeNormalizedAffineApply(FuncBuilder *b, Location loc, AffineMap map, ArrayRef operands) { SingleResultAffineNormalizer normalizer(map, operands); return b->create(loc, normalizer.getAffineMap(), normalizer.getOperands()); }