Fix typo "tranpose" (#124929)
This commit is contained in:
parent
27e01d1d74
commit
aa2952165c
@ -8,7 +8,7 @@
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error \
|
||||
"Never use <amxtf32tranposeintrin.h> directly; include <immintrin.h> instead."
|
||||
"Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif // __IMMINTRIN_H
|
||||
|
||||
#ifndef __AMX_TF32TRANSPOSEINTRIN_H
|
||||
|
@ -2269,7 +2269,7 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
||||
// For example, with the inputs as above, the result will be:
|
||||
// 0 8 2 A 4 C 6 E
|
||||
// 1 9 3 B 5 D 7 F
|
||||
// Now, this result can be tranposed again, but with the group size of 2:
|
||||
// Now, this result can be transposed again, but with the group size of 2:
|
||||
// 08 19 4C 5D
|
||||
// 2A 3B 6E 7F
|
||||
// If we then transpose that result, but with the group size of 4, we get:
|
||||
|
@ -71,7 +71,7 @@ For example, a pattern that transform
|
||||
outs(%init1 : tensor<2x1x3xf32>)
|
||||
dimensions = [1, 0, 2]
|
||||
%out = linalg.transpose
|
||||
ins(%tranpose: tensor<2x1x3xf32>)
|
||||
ins(%transpose: tensor<2x1x3xf32>)
|
||||
outs(%init2 : tensor<3x1x2xf32>)
|
||||
permutation = [2, 1, 0]
|
||||
```
|
||||
|
@ -1007,7 +1007,7 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
|
||||
|
||||
This operation may produce a silenceableFailure if the transpose spec is
|
||||
ill-formed (i.e. `outer_perm` or `inner_perm` are not permutations of the
|
||||
proper rank) or if the tranposition of all involved operations fails for any
|
||||
proper rank) or if the transposition of all involved operations fails for any
|
||||
reason.
|
||||
|
||||
This operation returns 3 handles, one to the transformed LinalgOp, one to
|
||||
|
@ -2779,7 +2779,7 @@ def Vector_MatmulOp : Vector_Op<"matrix_multiply", [Pure,
|
||||
"`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
|
||||
}
|
||||
|
||||
/// Vector dialect matrix tranposition op that operates on flattened 1-D
|
||||
/// Vector dialect matrix transposition op that operates on flattened 1-D
|
||||
/// MLIR vectors. This is the counterpart of llvm.matrix.transpose in MLIR.
|
||||
/// This may seem redundant with vector.transpose but it serves the purposes of
|
||||
/// more progressive lowering and localized type conversion on the path:
|
||||
@ -2799,7 +2799,7 @@ def Vector_FlatTransposeOp : Vector_Op<"flat_transpose", [Pure,
|
||||
let description = [{
|
||||
This is the counterpart of llvm.matrix.transpose in MLIR. It serves
|
||||
the purposes of more progressive lowering and localized type conversion.
|
||||
Higher levels typically lower matrix tranpositions into 'vector.transpose'
|
||||
Higher levels typically lower matrix transpositions into 'vector.transpose'
|
||||
operations. Subsequent rewriting rule progressively lower these operations
|
||||
into 'vector.flat_transpose' operations to bring the operations closer
|
||||
to the hardware ISA.
|
||||
|
@ -332,7 +332,7 @@ struct SplatOpToArmSMELowering : public OpRewritePattern<vector::SplatOp> {
|
||||
/// %transposed_src = arm_sme.tile_load %alloca[%c0, %c0]
|
||||
/// layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32>
|
||||
///
|
||||
/// NOTE: Tranposing via memory is obviously expensive, the current intention
|
||||
/// NOTE: Transposing via memory is obviously expensive, the current intention
|
||||
/// is to avoid the transpose if possible, this is therefore intended as a
|
||||
/// fallback and to provide base support for Vector ops. If it turns out
|
||||
/// transposes can't be avoided then this should be replaced with a more optimal
|
||||
|
@ -179,7 +179,7 @@ struct TransferReadLowering : public OpRewritePattern<vector::TransferReadOp> {
|
||||
if (isTransposeLoad &&
|
||||
elementType.getIntOrFloatBitWidth() < minTransposeBitWidth)
|
||||
return rewriter.notifyMatchFailure(
|
||||
readOp, "Unsupported data type for tranposition");
|
||||
readOp, "Unsupported data type for transposition");
|
||||
|
||||
// If load is transposed, get the base shape for the tensor descriptor.
|
||||
SmallVector<int64_t> descShape(vecTy.getShape());
|
||||
|
@ -304,7 +304,7 @@ struct LegalizeTransferReadOpsByDecomposition
|
||||
kMatchFailureNonPermutationMap);
|
||||
|
||||
// Note: For 2D vector types the only non-identity permutation is a simple
|
||||
// tranpose [1, 0].
|
||||
// transpose [1, 0].
|
||||
bool transposed = !permutationMap.isIdentity();
|
||||
|
||||
auto loc = readOp.getLoc();
|
||||
@ -352,7 +352,7 @@ struct LegalizeTransferWriteOpsByDecomposition
|
||||
kMatchFailureNonPermutationMap);
|
||||
|
||||
// Note: For 2D vector types the only non-identity permutation is a simple
|
||||
// tranpose [1, 0].
|
||||
// transpose [1, 0].
|
||||
bool transposed = !permutationMap.isIdentity();
|
||||
|
||||
auto loc = writeOp.getLoc();
|
||||
|
@ -935,7 +935,7 @@ LogicalResult NVVM::WgmmaMmaAsyncOp::verify() {
|
||||
// Check transpose (only available for f16/bf16)
|
||||
// Matrices A should be stored in row-major and B in column-major.
|
||||
// Only f16/bf16 matrices can be stored in either column-major or row-major
|
||||
// by setting the tranpose value(imm-trans-a,imm-trans-b) in PTX code.
|
||||
// by setting the transpose value(imm-trans-a,imm-trans-b) in PTX code.
|
||||
if ((typeA != WGMMATypes::f16 && typeA != WGMMATypes::bf16) &&
|
||||
(getLayoutA() == mlir::NVVM::MMALayout::col ||
|
||||
getLayoutB() == mlir::NVVM::MMALayout::row)) {
|
||||
|
@ -147,7 +147,7 @@ linalg::isaBroadcastOpInterface(GenericOp op) {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TranposeOpInterface implementation
|
||||
// TransposeOpInterface implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
std::optional<SmallVector<int64_t>>
|
||||
linalg::isaTransposeOpInterface(GenericOp op) {
|
||||
|
@ -138,7 +138,7 @@ FailureOr<Operation *> transposeConv2D(RewriterBase &rewriter,
|
||||
linalg::Conv2DNhwcHwcfQOp>(rewriter, op);
|
||||
}
|
||||
|
||||
void populateTranposeConv2DPatterns(RewritePatternSet &patterns) {
|
||||
void populateTransposeConv2DPatterns(RewritePatternSet &patterns) {
|
||||
MLIRContext *context = patterns.getContext();
|
||||
patterns.insert<
|
||||
ConvConverter<linalg::Conv2DNhwcFhwcOp, linalg::Conv2DNhwcHwcfOp>,
|
||||
|
@ -1269,7 +1269,7 @@ struct LinalgOpRewriter : public OpRewritePattern<linalg::GenericOp> {
|
||||
AffineExpr i, j, k;
|
||||
bindDims(getContext(), i, j, k);
|
||||
|
||||
// TODO: more robust patterns, tranposed versions, more kernels,
|
||||
// TODO: more robust patterns, transposed versions, more kernels,
|
||||
// identify alpha and beta and pass them to the CUDA calls.
|
||||
|
||||
// Recognize a SpMV kernel.
|
||||
|
@ -1488,7 +1488,7 @@ private:
|
||||
|
||||
/// Try to fold in place to extract(source, extractPosition) and return the
|
||||
/// folded result. Return null if folding is not possible (e.g. due to an
|
||||
/// internal tranposition in the result).
|
||||
/// internal transposition in the result).
|
||||
Value tryToFoldExtractOpInPlace(Value source);
|
||||
|
||||
ExtractOp extractOp;
|
||||
@ -1582,7 +1582,7 @@ ExtractFromInsertTransposeChainState::handleInsertOpWithPrefixPos(Value &res) {
|
||||
|
||||
/// Try to fold in place to extract(source, extractPosition) and return the
|
||||
/// folded result. Return null if folding is not possible (e.g. due to an
|
||||
/// internal tranposition in the result).
|
||||
/// internal transposition in the result).
|
||||
Value ExtractFromInsertTransposeChainState::tryToFoldExtractOpInPlace(
|
||||
Value source) {
|
||||
// TODO: Canonicalization for dynamic position not implemented yet.
|
||||
|
@ -377,18 +377,18 @@ mlir::vector::castAwayContractionLeadingOneDim(vector::ContractionOp contractOp,
|
||||
int64_t orginalZeroDim = it.value().getDimPosition(0);
|
||||
if (orginalZeroDim != dimToDrop) {
|
||||
// There are two reasons to be in this path, 1. We need to
|
||||
// tranpose the operand to make the dim to be dropped
|
||||
// transpose the operand to make the dim to be dropped
|
||||
// leading. 2. The dim to be dropped does not exist and in
|
||||
// that case we dont want to add a unit tranpose but we must
|
||||
// that case we dont want to add a unit transpose but we must
|
||||
// check all the indices to make sure this is the case.
|
||||
bool tranposeNeeded = false;
|
||||
bool transposeNeeded = false;
|
||||
SmallVector<int64_t> perm;
|
||||
SmallVector<AffineExpr> transposeResults;
|
||||
|
||||
for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
|
||||
int64_t currDim = map.getDimPosition(i);
|
||||
if (currDim == dimToDrop) {
|
||||
tranposeNeeded = true;
|
||||
transposeNeeded = true;
|
||||
perm.insert(perm.begin(), i);
|
||||
auto targetExpr = rewriter.getAffineDimExpr(currDim);
|
||||
transposeResults.insert(transposeResults.begin(), targetExpr);
|
||||
@ -413,9 +413,9 @@ mlir::vector::castAwayContractionLeadingOneDim(vector::ContractionOp contractOp,
|
||||
}
|
||||
}
|
||||
|
||||
// Do the tranpose now if needed so that we can drop the
|
||||
// Do the transpose now if needed so that we can drop the
|
||||
// correct dim using extract later.
|
||||
if (tranposeNeeded) {
|
||||
if (transposeNeeded) {
|
||||
map = AffineMap::get(map.getNumDims(), 0, transposeResults,
|
||||
contractOp.getContext());
|
||||
if (transposeNonOuterUnitDims) {
|
||||
@ -474,7 +474,7 @@ namespace {
|
||||
|
||||
/// Turns vector.contract on vector with leading 1 dimensions into
|
||||
/// vector.extract followed by vector.contract on vector without leading
|
||||
/// 1 dimensions. Also performs tranpose of lhs and rhs operands if required
|
||||
/// 1 dimensions. Also performs transpose of lhs and rhs operands if required
|
||||
/// prior to extract.
|
||||
struct CastAwayContractionLeadingOneDim
|
||||
: public MaskableOpRewritePattern<vector::ContractionOp> {
|
||||
|
@ -1792,11 +1792,11 @@ struct DropUnitDimsFromTransposeOp final
|
||||
auto dropDimsShapeCast = rewriter.create<vector::ShapeCastOp>(
|
||||
loc, sourceTypeWithoutUnitDims, op.getVector());
|
||||
// Create the new transpose.
|
||||
auto tranposeWithoutUnitDims =
|
||||
auto transposeWithoutUnitDims =
|
||||
rewriter.create<vector::TransposeOp>(loc, dropDimsShapeCast, newPerm);
|
||||
// Restore the unit dims via shape cast.
|
||||
rewriter.replaceOpWithNewOp<vector::ShapeCastOp>(
|
||||
op, op.getResultVectorType(), tranposeWithoutUnitDims);
|
||||
op, op.getResultVectorType(), transposeWithoutUnitDims);
|
||||
|
||||
return success();
|
||||
}
|
||||
|
@ -232,11 +232,11 @@ func.func @vector_reduction(%v : vector<8xf32>) -> f32 {
|
||||
// CHECK: %[[add3:.*]] = arith.addf %[[add2]], %[[r3]]
|
||||
// CHECK: return %[[add3]]
|
||||
|
||||
func.func @vector_tranpose(%v : vector<2x4x3x8xf32>) -> vector<2x3x8x4xf32> {
|
||||
func.func @vector_transpose(%v : vector<2x4x3x8xf32>) -> vector<2x3x8x4xf32> {
|
||||
%t = vector.transpose %v, [0, 2, 3, 1] : vector<2x4x3x8xf32> to vector<2x3x8x4xf32>
|
||||
return %t : vector<2x3x8x4xf32>
|
||||
}
|
||||
// CHECK-LABEL: func @vector_tranpose
|
||||
// CHECK-LABEL: func @vector_transpose
|
||||
// CHECK: %[[VI:.*]] = arith.constant dense<0.000000e+00> : vector<2x3x8x4xf32>
|
||||
// CHECK: %[[E0:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0, 0, 0], sizes = [1, 2, 3, 4], strides = [1, 1, 1, 1]} : vector<2x4x3x8xf32> to vector<1x2x3x4xf32>
|
||||
// CHECK: %[[T0:.*]] = vector.transpose %[[E0]], [0, 2, 3, 1] : vector<1x2x3x4xf32> to vector<1x3x4x2xf32>
|
||||
|
Loading…
x
Reference in New Issue
Block a user