[AArch64] Initial Ampere1B scheduling model (#81341)
The Ampere1B core is enabled with a new scheduling/pipeline model, as it provides significant updates over the Ampere1 core; it reduces latencies on many instructions, has some micro-ops reassigned between the XY and X units, and provides modelling for the instructions added since Ampere1 and Ampere1A. As this is the first model implementing the CSSC instructions, we update the UnsupportedFeatures on all other models (that have CompleteModel set). Testcases are added under llvm-mca: these showed the FullFP16 feature missing, so we are adding it in as part of this commit. This *adds tests and additional fixes* compared to the reverted #81338.
This commit is contained in:
parent
43c7eb5d7b
commit
dd1897c6cb
@ -837,6 +837,7 @@ include "AArch64SchedA64FX.td"
|
||||
include "AArch64SchedThunderX3T110.td"
|
||||
include "AArch64SchedTSV110.td"
|
||||
include "AArch64SchedAmpere1.td"
|
||||
include "AArch64SchedAmpere1B.td"
|
||||
include "AArch64SchedNeoverseN1.td"
|
||||
include "AArch64SchedNeoverseN2.td"
|
||||
include "AArch64SchedNeoverseV1.td"
|
||||
@ -1555,7 +1556,7 @@ def ProcessorFeatures {
|
||||
FeatureMTE, FeatureSSBS, FeatureRandGen,
|
||||
FeatureSB, FeatureSM4, FeatureSHA2,
|
||||
FeatureSHA3, FeatureAES, FeatureCSSC,
|
||||
FeatureWFxT];
|
||||
FeatureWFxT, FeatureFullFP16];
|
||||
|
||||
// ETE and TRBE are future architecture extensions. We temporarily enable them
|
||||
// by default for users targeting generic AArch64. The extensions do not
|
||||
@ -1723,7 +1724,7 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
|
||||
def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A,
|
||||
[TuneAmpere1A]>;
|
||||
|
||||
def : ProcessorModel<"ampere1b", Ampere1Model, ProcessorFeatures.Ampere1B,
|
||||
def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B,
|
||||
[TuneAmpere1B]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -22,7 +22,8 @@ def A64FXModel : SchedMachineModel {
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
|
||||
[HasMTE, HasMatMulInt8, HasBF16,
|
||||
HasPAuth, HasPAuthLR, HasCPA]);
|
||||
HasPAuth, HasPAuthLR, HasCPA,
|
||||
HasCSSC]);
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
||||
|
1149
llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td
Normal file
1149
llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td
Normal file
File diff suppressed because it is too large
Load Diff
@ -21,7 +21,7 @@ def CycloneModel : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -27,7 +27,7 @@ def ExynosM3Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -27,7 +27,7 @@ def ExynosM4Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -27,7 +27,7 @@ def ExynosM5Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -26,7 +26,7 @@ def FalkorModel : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ def KryoModel : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ def NeoverseN1Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
SVEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel {
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
|
||||
[HasSVE2p1, HasPAuthLR, HasCPA]);
|
||||
[HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -28,7 +28,8 @@ def NeoverseV1Model : SchedMachineModel {
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE, HasCPA]);
|
||||
[HasMTE, HasCPA,
|
||||
HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -22,7 +22,8 @@ def NeoverseV2Model : SchedMachineModel {
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
|
||||
[HasSVE2p1, HasCPA]);
|
||||
[HasSVE2p1, HasCPA,
|
||||
HasCSSC]);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -27,7 +27,7 @@ def TSV110Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
}
|
||||
|
||||
// Define each kind of processor resource and number available on the TSV110,
|
||||
|
@ -28,7 +28,7 @@ def ThunderXT8XModel : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ def ThunderX2T99Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ def ThunderX3T110Model : SchedMachineModel {
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F,
|
||||
[HasMTE]);
|
||||
[HasMTE, HasCSSC]);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,76 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s
|
||||
|
||||
abs w1, w2
|
||||
abs x2, x3
|
||||
cnt w3, w4
|
||||
cnt x4, x5
|
||||
ctz w5, w6
|
||||
ctz x6, x7
|
||||
smax w7, w8, w9
|
||||
smax x8, x9, x10
|
||||
umax w9, w10, w11
|
||||
umax x10, x11, x12
|
||||
smin w11, w12, w13
|
||||
smin w12, w13, w14
|
||||
umin w13, w14, w15
|
||||
umin x14, x15, x16
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.25 abs w1, w2
|
||||
# CHECK-NEXT: 1 1 0.25 abs x2, x3
|
||||
# CHECK-NEXT: 1 3 1.00 cnt w3, w4
|
||||
# CHECK-NEXT: 1 3 1.00 cnt x4, x5
|
||||
# CHECK-NEXT: 1 1 0.50 ctz w5, w6
|
||||
# CHECK-NEXT: 1 1 0.50 ctz x6, x7
|
||||
# CHECK-NEXT: 2 1 0.50 smax w7, w8, w9
|
||||
# CHECK-NEXT: 2 1 0.50 smax x8, x9, x10
|
||||
# CHECK-NEXT: 2 1 0.50 umax w9, w10, w11
|
||||
# CHECK-NEXT: 2 1 0.50 umax x10, x11, x12
|
||||
# CHECK-NEXT: 2 1 0.50 smin w11, w12, w13
|
||||
# CHECK-NEXT: 2 1 0.50 smin w12, w13, w14
|
||||
# CHECK-NEXT: 2 1 0.50 umin w13, w14, w15
|
||||
# CHECK-NEXT: 2 1 0.50 umin x14, x15, x16
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0.0] - Ampere1BUnitA
|
||||
# CHECK-NEXT: [0.1] - Ampere1BUnitA
|
||||
# CHECK-NEXT: [1.0] - Ampere1BUnitB
|
||||
# CHECK-NEXT: [1.1] - Ampere1BUnitB
|
||||
# CHECK-NEXT: [2] - Ampere1BUnitBS
|
||||
# CHECK-NEXT: [3.0] - Ampere1BUnitL
|
||||
# CHECK-NEXT: [3.1] - Ampere1BUnitL
|
||||
# CHECK-NEXT: [4.0] - Ampere1BUnitS
|
||||
# CHECK-NEXT: [4.1] - Ampere1BUnitS
|
||||
# CHECK-NEXT: [5] - Ampere1BUnitX
|
||||
# CHECK-NEXT: [6] - Ampere1BUnitY
|
||||
# CHECK-NEXT: [7] - Ampere1BUnitZ
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7]
|
||||
# CHECK-NEXT: 6.50 6.50 3.50 3.50 2.00 - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions:
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs w1, w2
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs x2, x3
|
||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt w3, w4
|
||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt x4, x5
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz w5, w6
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz x6, x7
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax w7, w8, w9
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax x8, x9, x10
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax w9, w10, w11
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax x10, x11, x12
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w11, w12, w13
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w12, w13, w14
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin w13, w14, w15
|
||||
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin x14, x15, x16
|
@ -0,0 +1,349 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s
|
||||
|
||||
irg x0, x1
|
||||
irg sp, x1
|
||||
irg x0, sp
|
||||
irg x0, x1, x2
|
||||
irg sp, x1, x2
|
||||
addg x0, x1, #0, #1
|
||||
addg sp, x2, #32, #3
|
||||
addg x0, sp, #64, #5
|
||||
addg x3, x4, #1008, #6
|
||||
addg x5, x6, #112, #15
|
||||
subg x0, x1, #0, #1
|
||||
subg sp, x2, #32, #3
|
||||
subg x0, sp, #64, #5
|
||||
subg x3, x4, #1008, #6
|
||||
subg x5, x6, #112, #15
|
||||
gmi x0, x1, x2
|
||||
gmi x3, sp, x4
|
||||
gmi xzr, x0, x30
|
||||
gmi x30, x0, xzr
|
||||
subp x0, x1, x2
|
||||
subps x0, x1, x2
|
||||
subp x0, sp, sp
|
||||
subps x0, sp, sp
|
||||
subps xzr, x0, x1
|
||||
subps xzr, sp, sp
|
||||
stg x0, [x1, #-4096]
|
||||
stg x1, [x2, #4080]
|
||||
stg x2, [sp, #16]
|
||||
stg x3, [x1]
|
||||
stg sp, [x1]
|
||||
stzg x0, [x1, #-4096]
|
||||
stzg x1, [x2, #4080]
|
||||
stzg x2, [sp, #16]
|
||||
stzg x3, [x1]
|
||||
stzg sp, [x1]
|
||||
stg x0, [x1, #-4096]!
|
||||
stg x1, [x2, #4080]!
|
||||
stg x2, [sp, #16]!
|
||||
stg sp, [sp, #16]!
|
||||
stzg x0, [x1, #-4096]!
|
||||
stzg x1, [x2, #4080]!
|
||||
stzg x2, [sp, #16]!
|
||||
stzg sp, [sp, #16]!
|
||||
stg x0, [x1], #-4096
|
||||
stg x1, [x2], #4080
|
||||
stg x2, [sp], #16
|
||||
stg sp, [sp], #16
|
||||
stzg x0, [x1], #-4096
|
||||
stzg x1, [x2], #4080
|
||||
stzg x2, [sp], #16
|
||||
stzg sp, [sp], #16
|
||||
st2g x0, [x1, #-4096]
|
||||
st2g x1, [x2, #4080]
|
||||
st2g x2, [sp, #16]
|
||||
st2g x3, [x1]
|
||||
st2g sp, [x1]
|
||||
stz2g x0, [x1, #-4096]
|
||||
stz2g x1, [x2, #4080]
|
||||
stz2g x2, [sp, #16]
|
||||
stz2g x3, [x1]
|
||||
stz2g sp, [x1]
|
||||
st2g x0, [x1, #-4096]!
|
||||
st2g x1, [x2, #4080]!
|
||||
st2g x2, [sp, #16]!
|
||||
st2g sp, [sp, #16]!
|
||||
stz2g x0, [x1, #-4096]!
|
||||
stz2g x1, [x2, #4080]!
|
||||
stz2g x2, [sp, #16]!
|
||||
stz2g sp, [sp, #16]!
|
||||
st2g x0, [x1], #-4096
|
||||
st2g x1, [x2], #4080
|
||||
st2g x2, [sp], #16
|
||||
st2g sp, [sp], #16
|
||||
stz2g x0, [x1], #-4096
|
||||
stz2g x1, [x2], #4080
|
||||
stz2g x2, [sp], #16
|
||||
stz2g sp, [sp], #16
|
||||
stgp x0, x1, [x2, #-1024]
|
||||
stgp x0, x1, [x2, #1008]
|
||||
stgp x0, x1, [sp, #16]
|
||||
stgp xzr, x1, [x2, #16]
|
||||
stgp x0, xzr, [x2, #16]
|
||||
stgp x0, xzr, [x2]
|
||||
stgp x0, x1, [x2, #-1024]!
|
||||
stgp x0, x1, [x2, #1008]!
|
||||
stgp x0, x1, [sp, #16]!
|
||||
stgp xzr, x1, [x2, #16]!
|
||||
stgp x0, xzr, [x2, #16]!
|
||||
stgp x0, x1, [x2], #-1024
|
||||
stgp x0, x1, [x2], #1008
|
||||
stgp x0, x1, [sp], #16
|
||||
stgp xzr, x1, [x2], #16
|
||||
stgp x0, xzr, [x2], #16
|
||||
ldg x0, [x1]
|
||||
ldg x2, [sp, #-4096]
|
||||
ldg x3, [x4, #4080]
|
||||
ldgm x0, [x1]
|
||||
ldgm x1, [sp]
|
||||
ldgm xzr, [x2]
|
||||
stgm x0, [x1]
|
||||
stgm x1, [sp]
|
||||
stgm xzr, [x2]
|
||||
stzgm x0, [x1]
|
||||
stzgm x1, [sp]
|
||||
stzgm xzr, [x2]
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 2 1 1.00 U irg x0, x1
|
||||
# CHECK-NEXT: 2 1 1.00 U irg sp, x1
|
||||
# CHECK-NEXT: 2 1 1.00 U irg x0, sp
|
||||
# CHECK-NEXT: 2 1 1.00 U irg x0, x1, x2
|
||||
# CHECK-NEXT: 2 1 1.00 U irg sp, x1, x2
|
||||
# CHECK-NEXT: 1 1 0.50 addg x0, x1, #0, #1
|
||||
# CHECK-NEXT: 1 1 0.50 addg sp, x2, #32, #3
|
||||
# CHECK-NEXT: 1 1 0.50 addg x0, sp, #64, #5
|
||||
# CHECK-NEXT: 1 1 0.50 addg x3, x4, #1008, #6
|
||||
# CHECK-NEXT: 1 1 0.50 addg x5, x6, #112, #15
|
||||
# CHECK-NEXT: 1 1 0.50 U subg x0, x1, #0, #1
|
||||
# CHECK-NEXT: 1 1 0.50 U subg sp, x2, #32, #3
|
||||
# CHECK-NEXT: 1 1 0.50 U subg x0, sp, #64, #5
|
||||
# CHECK-NEXT: 1 1 0.50 U subg x3, x4, #1008, #6
|
||||
# CHECK-NEXT: 1 1 0.50 U subg x5, x6, #112, #15
|
||||
# CHECK-NEXT: 1 1 0.25 gmi x0, x1, x2
|
||||
# CHECK-NEXT: 1 1 0.25 gmi x3, sp, x4
|
||||
# CHECK-NEXT: 1 1 0.25 gmi xzr, x0, x30
|
||||
# CHECK-NEXT: 1 1 0.25 gmi x30, x0, xzr
|
||||
# CHECK-NEXT: 1 1 0.25 subp x0, x1, x2
|
||||
# CHECK-NEXT: 1 1 0.25 U subps x0, x1, x2
|
||||
# CHECK-NEXT: 1 1 0.25 subp x0, sp, sp
|
||||
# CHECK-NEXT: 1 1 0.25 U subps x0, sp, sp
|
||||
# CHECK-NEXT: 1 1 0.25 U subps xzr, x0, x1
|
||||
# CHECK-NEXT: 1 1 0.25 U subps xzr, sp, sp
|
||||
# CHECK-NEXT: 1 1 0.50 * stg x0, [x1, #-4096]
|
||||
# CHECK-NEXT: 1 1 0.50 * stg x1, [x2, #4080]
|
||||
# CHECK-NEXT: 1 1 0.50 * stg x2, [sp, #16]
|
||||
# CHECK-NEXT: 1 1 0.50 * stg x3, [x1]
|
||||
# CHECK-NEXT: 1 1 0.50 * stg sp, [x1]
|
||||
# CHECK-NEXT: 1 1 0.50 * stzg x0, [x1, #-4096]
|
||||
# CHECK-NEXT: 1 1 0.50 * stzg x1, [x2, #4080]
|
||||
# CHECK-NEXT: 1 1 0.50 * stzg x2, [sp, #16]
|
||||
# CHECK-NEXT: 1 1 0.50 * stzg x3, [x1]
|
||||
# CHECK-NEXT: 1 1 0.50 * stzg sp, [x1]
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg x1, [x2, #4080]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg x2, [sp, #16]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg sp, [sp, #16]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg x1, [x2, #4080]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg x2, [sp, #16]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg sp, [sp, #16]!
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg x0, [x1], #-4096
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg x1, [x2], #4080
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg x2, [sp], #16
|
||||
# CHECK-NEXT: 1 1 0.50 * U stg sp, [sp], #16
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg x0, [x1], #-4096
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg x1, [x2], #4080
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg x2, [sp], #16
|
||||
# CHECK-NEXT: 1 1 0.50 * U stzg sp, [sp], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * st2g x0, [x1, #-4096]
|
||||
# CHECK-NEXT: 2 1 1.00 * st2g x1, [x2, #4080]
|
||||
# CHECK-NEXT: 2 1 1.00 * st2g x2, [sp, #16]
|
||||
# CHECK-NEXT: 2 1 1.00 * st2g x3, [x1]
|
||||
# CHECK-NEXT: 2 1 1.00 * st2g sp, [x1]
|
||||
# CHECK-NEXT: 2 1 1.00 * stz2g x0, [x1, #-4096]
|
||||
# CHECK-NEXT: 2 1 1.00 * stz2g x1, [x2, #4080]
|
||||
# CHECK-NEXT: 2 1 1.00 * stz2g x2, [sp, #16]
|
||||
# CHECK-NEXT: 2 1 1.00 * stz2g x3, [x1]
|
||||
# CHECK-NEXT: 2 1 1.00 * stz2g sp, [x1]
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g x1, [x2, #4080]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g x2, [sp, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g sp, [sp, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g x1, [x2, #4080]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g x2, [sp, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g sp, [sp, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g x0, [x1], #-4096
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g x1, [x2], #4080
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g x2, [sp], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * U st2g sp, [sp], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g x0, [x1], #-4096
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g x1, [x2], #4080
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g x2, [sp], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * U stz2g sp, [sp], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #-1024]
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #1008]
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp, #16]
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2, #16]
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2, #16]
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2]
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #-1024]!
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #1008]!
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2, #16]!
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2], #-1024
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2], #1008
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2], #16
|
||||
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2], #16
|
||||
# CHECK-NEXT: 2 4 0.50 * ldg x0, [x1]
|
||||
# CHECK-NEXT: 2 4 0.50 * ldg x2, [sp, #-4096]
|
||||
# CHECK-NEXT: 2 4 0.50 * ldg x3, [x4, #4080]
|
||||
# CHECK-NEXT: 2 4 0.50 * U ldgm x0, [x1]
|
||||
# CHECK-NEXT: 2 4 0.50 * U ldgm x1, [sp]
|
||||
# CHECK-NEXT: 2 4 0.50 * U ldgm xzr, [x2]
|
||||
# CHECK-NEXT: 1 1 0.50 U stgm x0, [x1]
|
||||
# CHECK-NEXT: 1 1 0.50 U stgm x1, [sp]
|
||||
# CHECK-NEXT: 1 1 0.50 U stgm xzr, [x2]
|
||||
# CHECK-NEXT: 1 1 0.50 U stzgm x0, [x1]
|
||||
# CHECK-NEXT: 1 1 0.50 U stzgm x1, [sp]
|
||||
# CHECK-NEXT: 1 1 0.50 U stzgm xzr, [x2]
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0.0] - Ampere1BUnitA
|
||||
# CHECK-NEXT: [0.1] - Ampere1BUnitA
|
||||
# CHECK-NEXT: [1.0] - Ampere1BUnitB
|
||||
# CHECK-NEXT: [1.1] - Ampere1BUnitB
|
||||
# CHECK-NEXT: [2] - Ampere1BUnitBS
|
||||
# CHECK-NEXT: [3.0] - Ampere1BUnitL
|
||||
# CHECK-NEXT: [3.1] - Ampere1BUnitL
|
||||
# CHECK-NEXT: [4.0] - Ampere1BUnitS
|
||||
# CHECK-NEXT: [4.1] - Ampere1BUnitS
|
||||
# CHECK-NEXT: [5] - Ampere1BUnitX
|
||||
# CHECK-NEXT: [6] - Ampere1BUnitY
|
||||
# CHECK-NEXT: [7] - Ampere1BUnitZ
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7]
|
||||
# CHECK-NEXT: 2.50 2.50 13.00 13.00 5.00 3.00 3.00 58.00 58.00 - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions:
|
||||
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, x1
|
||||
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg sp, x1
|
||||
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, sp
|
||||
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, x1, x2
|
||||
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg sp, x1, x2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x0, x1, #0, #1
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg sp, x2, #32, #3
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x0, sp, #64, #5
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x3, x4, #1008, #6
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x5, x6, #112, #15
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x0, x1, #0, #1
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg sp, x2, #32, #3
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x0, sp, #64, #5
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x3, x4, #1008, #6
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x5, x6, #112, #15
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x0, x1, x2
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x3, sp, x4
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi xzr, x0, x30
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x30, x0, xzr
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subp x0, x1, x2
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps x0, x1, x2
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subp x0, sp, sp
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps x0, sp, sp
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps xzr, x0, x1
|
||||
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps xzr, sp, sp
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1, #-4096]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2, #4080]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp, #16]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x3, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1, #-4096]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2, #4080]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp, #16]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x3, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2, #4080]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2, #4080]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1], #-4096
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2], #4080
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1], #-4096
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2], #4080
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1, #-4096]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2, #4080]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp, #16]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x3, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1, #-4096]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2, #4080]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp, #16]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x3, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2, #4080]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1, #-4096]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2, #4080]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1], #-4096
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2], #4080
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1], #-4096
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2], #4080
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #-1024]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #1008]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp, #16]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2, #16]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2, #16]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2]
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #-1024]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #1008]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2, #16]!
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2], #-1024
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2], #1008
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2], #16
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2], #16
|
||||
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x0, [x1]
|
||||
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x2, [sp, #-4096]
|
||||
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x3, [x4, #4080]
|
||||
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm x0, [x1]
|
||||
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm x1, [sp]
|
||||
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm xzr, [x2]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm x0, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm x1, [sp]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm xzr, [x2]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm x0, [x1]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm x1, [sp]
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm xzr, [x2]
|
3235
llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s
Normal file
3235
llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,31 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=ampere1b -resource-pressure=false < %s | FileCheck %s
|
||||
|
||||
add w0, w1, w2, lsl #0
|
||||
sub x3, x4, x5, lsl #1
|
||||
adds x6, x7, x8, lsr #2
|
||||
subs x9, x10, x11, asr #3
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 400
|
||||
# CHECK-NEXT: Total Cycles: 156
|
||||
# CHECK-NEXT: Total uOps: 600
|
||||
|
||||
# CHECK: Dispatch Width: 12
|
||||
# CHECK-NEXT: uOps Per Cycle: 3.85
|
||||
# CHECK-NEXT: IPC: 2.56
|
||||
# CHECK-NEXT: Block RThroughput: 1.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.25 add w0, w1, w2
|
||||
# CHECK-NEXT: 1 1 0.25 sub x3, x4, x5, lsl #1
|
||||
# CHECK-NEXT: 2 2 0.50 adds x6, x7, x8, lsr #2
|
||||
# CHECK-NEXT: 2 2 0.50 subs x9, x10, x11, asr #3
|
Loading…
x
Reference in New Issue
Block a user