[AArch64] Initial Ampere1B scheduling model (#81341)

The Ampere1B core is enabled with a new scheduling/pipeline model, as it
provides significant updates over the Ampere1 core; it reduces latencies
on many instructions, has some micro-ops reassigned between the XY and X
units, and provides modelling for the instructions added since Ampere1
and Ampere1A.

As this is the first model implementing the CSSC instructions, we update
the UnsupportedFeatures on all other models (that have CompleteModel
set).
    
Testcases are added under llvm-mca: these showed the FullFP16 feature
missing, so we are adding it in as part of this commit.

This *adds tests and additional fixes* compared to the reverted #81338.
This commit is contained in:
Philipp Tomsich 2024-02-14 06:23:14 -08:00 committed by GitHub
parent 43c7eb5d7b
commit dd1897c6cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 8587 additions and 19 deletions

View File

@ -837,6 +837,7 @@ include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
include "AArch64SchedTSV110.td"
include "AArch64SchedAmpere1.td"
include "AArch64SchedAmpere1B.td"
include "AArch64SchedNeoverseN1.td"
include "AArch64SchedNeoverseN2.td"
include "AArch64SchedNeoverseV1.td"
@ -1555,7 +1556,7 @@ def ProcessorFeatures {
FeatureMTE, FeatureSSBS, FeatureRandGen,
FeatureSB, FeatureSM4, FeatureSHA2,
FeatureSHA3, FeatureAES, FeatureCSSC,
FeatureWFxT];
FeatureWFxT, FeatureFullFP16];
// ETE and TRBE are future architecture extensions. We temporarily enable them
// by default for users targeting generic AArch64. The extensions do not
@ -1723,7 +1724,7 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A,
[TuneAmpere1A]>;
def : ProcessorModel<"ampere1b", Ampere1Model, ProcessorFeatures.Ampere1B,
def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B,
[TuneAmpere1B]>;
//===----------------------------------------------------------------------===//

View File

@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}

View File

@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -22,7 +22,8 @@ def A64FXModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
[HasMTE, HasMatMulInt8, HasBF16,
HasPAuth, HasPAuthLR, HasCPA]);
HasPAuth, HasPAuthLR, HasCPA,
HasCSSC]);
let FullInstRWOverlapCheck = 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@ def CycloneModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -27,7 +27,7 @@ def ExynosM3Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -27,7 +27,7 @@ def ExynosM4Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -27,7 +27,7 @@ def ExynosM5Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -26,7 +26,7 @@ def FalkorModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -30,7 +30,7 @@ def KryoModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -25,7 +25,7 @@ def NeoverseN1Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
SMEUnsupported.F,
SVEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
[HasSVE2p1, HasPAuthLR, HasCPA]);
[HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -28,7 +28,8 @@ def NeoverseV1Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
SMEUnsupported.F,
[HasMTE, HasCPA]);
[HasMTE, HasCPA,
HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -22,7 +22,8 @@ def NeoverseV2Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
[HasSVE2p1, HasCPA]);
[HasSVE2p1, HasCPA,
HasCSSC]);
}
//===----------------------------------------------------------------------===//

View File

@ -27,7 +27,7 @@ def TSV110Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
}
// Define each kind of processor resource and number available on the TSV110,

View File

@ -28,7 +28,7 @@ def ThunderXT8XModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -28,7 +28,7 @@ def ThunderX2T99Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -27,7 +27,7 @@ def ThunderX3T110Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
SMEUnsupported.F,
[HasMTE]);
[HasMTE, HasCSSC]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s
abs w1, w2
abs x2, x3
cnt w3, w4
cnt x4, x5
ctz w5, w6
ctz x6, x7
smax w7, w8, w9
smax x8, x9, x10
umax w9, w10, w11
umax x10, x11, x12
smin w11, w12, w13
smin w12, w13, w14
umin w13, w14, w15
umin x14, x15, x16
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.25 abs w1, w2
# CHECK-NEXT: 1 1 0.25 abs x2, x3
# CHECK-NEXT: 1 3 1.00 cnt w3, w4
# CHECK-NEXT: 1 3 1.00 cnt x4, x5
# CHECK-NEXT: 1 1 0.50 ctz w5, w6
# CHECK-NEXT: 1 1 0.50 ctz x6, x7
# CHECK-NEXT: 2 1 0.50 smax w7, w8, w9
# CHECK-NEXT: 2 1 0.50 smax x8, x9, x10
# CHECK-NEXT: 2 1 0.50 umax w9, w10, w11
# CHECK-NEXT: 2 1 0.50 umax x10, x11, x12
# CHECK-NEXT: 2 1 0.50 smin w11, w12, w13
# CHECK-NEXT: 2 1 0.50 smin w12, w13, w14
# CHECK-NEXT: 2 1 0.50 umin w13, w14, w15
# CHECK-NEXT: 2 1 0.50 umin x14, x15, x16
# CHECK: Resources:
# CHECK-NEXT: [0.0] - Ampere1BUnitA
# CHECK-NEXT: [0.1] - Ampere1BUnitA
# CHECK-NEXT: [1.0] - Ampere1BUnitB
# CHECK-NEXT: [1.1] - Ampere1BUnitB
# CHECK-NEXT: [2] - Ampere1BUnitBS
# CHECK-NEXT: [3.0] - Ampere1BUnitL
# CHECK-NEXT: [3.1] - Ampere1BUnitL
# CHECK-NEXT: [4.0] - Ampere1BUnitS
# CHECK-NEXT: [4.1] - Ampere1BUnitS
# CHECK-NEXT: [5] - Ampere1BUnitX
# CHECK-NEXT: [6] - Ampere1BUnitY
# CHECK-NEXT: [7] - Ampere1BUnitZ
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7]
# CHECK-NEXT: 6.50 6.50 3.50 3.50 2.00 - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions:
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs w1, w2
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs x2, x3
# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt w3, w4
# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt x4, x5
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz w5, w6
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz x6, x7
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax w7, w8, w9
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax x8, x9, x10
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax w9, w10, w11
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax x10, x11, x12
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w11, w12, w13
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w12, w13, w14
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin w13, w14, w15
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin x14, x15, x16

View File

@ -0,0 +1,349 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s
irg x0, x1
irg sp, x1
irg x0, sp
irg x0, x1, x2
irg sp, x1, x2
addg x0, x1, #0, #1
addg sp, x2, #32, #3
addg x0, sp, #64, #5
addg x3, x4, #1008, #6
addg x5, x6, #112, #15
subg x0, x1, #0, #1
subg sp, x2, #32, #3
subg x0, sp, #64, #5
subg x3, x4, #1008, #6
subg x5, x6, #112, #15
gmi x0, x1, x2
gmi x3, sp, x4
gmi xzr, x0, x30
gmi x30, x0, xzr
subp x0, x1, x2
subps x0, x1, x2
subp x0, sp, sp
subps x0, sp, sp
subps xzr, x0, x1
subps xzr, sp, sp
stg x0, [x1, #-4096]
stg x1, [x2, #4080]
stg x2, [sp, #16]
stg x3, [x1]
stg sp, [x1]
stzg x0, [x1, #-4096]
stzg x1, [x2, #4080]
stzg x2, [sp, #16]
stzg x3, [x1]
stzg sp, [x1]
stg x0, [x1, #-4096]!
stg x1, [x2, #4080]!
stg x2, [sp, #16]!
stg sp, [sp, #16]!
stzg x0, [x1, #-4096]!
stzg x1, [x2, #4080]!
stzg x2, [sp, #16]!
stzg sp, [sp, #16]!
stg x0, [x1], #-4096
stg x1, [x2], #4080
stg x2, [sp], #16
stg sp, [sp], #16
stzg x0, [x1], #-4096
stzg x1, [x2], #4080
stzg x2, [sp], #16
stzg sp, [sp], #16
st2g x0, [x1, #-4096]
st2g x1, [x2, #4080]
st2g x2, [sp, #16]
st2g x3, [x1]
st2g sp, [x1]
stz2g x0, [x1, #-4096]
stz2g x1, [x2, #4080]
stz2g x2, [sp, #16]
stz2g x3, [x1]
stz2g sp, [x1]
st2g x0, [x1, #-4096]!
st2g x1, [x2, #4080]!
st2g x2, [sp, #16]!
st2g sp, [sp, #16]!
stz2g x0, [x1, #-4096]!
stz2g x1, [x2, #4080]!
stz2g x2, [sp, #16]!
stz2g sp, [sp, #16]!
st2g x0, [x1], #-4096
st2g x1, [x2], #4080
st2g x2, [sp], #16
st2g sp, [sp], #16
stz2g x0, [x1], #-4096
stz2g x1, [x2], #4080
stz2g x2, [sp], #16
stz2g sp, [sp], #16
stgp x0, x1, [x2, #-1024]
stgp x0, x1, [x2, #1008]
stgp x0, x1, [sp, #16]
stgp xzr, x1, [x2, #16]
stgp x0, xzr, [x2, #16]
stgp x0, xzr, [x2]
stgp x0, x1, [x2, #-1024]!
stgp x0, x1, [x2, #1008]!
stgp x0, x1, [sp, #16]!
stgp xzr, x1, [x2, #16]!
stgp x0, xzr, [x2, #16]!
stgp x0, x1, [x2], #-1024
stgp x0, x1, [x2], #1008
stgp x0, x1, [sp], #16
stgp xzr, x1, [x2], #16
stgp x0, xzr, [x2], #16
ldg x0, [x1]
ldg x2, [sp, #-4096]
ldg x3, [x4, #4080]
ldgm x0, [x1]
ldgm x1, [sp]
ldgm xzr, [x2]
stgm x0, [x1]
stgm x1, [sp]
stgm xzr, [x2]
stzgm x0, [x1]
stzgm x1, [sp]
stzgm xzr, [x2]
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 1 1.00 U irg x0, x1
# CHECK-NEXT: 2 1 1.00 U irg sp, x1
# CHECK-NEXT: 2 1 1.00 U irg x0, sp
# CHECK-NEXT: 2 1 1.00 U irg x0, x1, x2
# CHECK-NEXT: 2 1 1.00 U irg sp, x1, x2
# CHECK-NEXT: 1 1 0.50 addg x0, x1, #0, #1
# CHECK-NEXT: 1 1 0.50 addg sp, x2, #32, #3
# CHECK-NEXT: 1 1 0.50 addg x0, sp, #64, #5
# CHECK-NEXT: 1 1 0.50 addg x3, x4, #1008, #6
# CHECK-NEXT: 1 1 0.50 addg x5, x6, #112, #15
# CHECK-NEXT: 1 1 0.50 U subg x0, x1, #0, #1
# CHECK-NEXT: 1 1 0.50 U subg sp, x2, #32, #3
# CHECK-NEXT: 1 1 0.50 U subg x0, sp, #64, #5
# CHECK-NEXT: 1 1 0.50 U subg x3, x4, #1008, #6
# CHECK-NEXT: 1 1 0.50 U subg x5, x6, #112, #15
# CHECK-NEXT: 1 1 0.25 gmi x0, x1, x2
# CHECK-NEXT: 1 1 0.25 gmi x3, sp, x4
# CHECK-NEXT: 1 1 0.25 gmi xzr, x0, x30
# CHECK-NEXT: 1 1 0.25 gmi x30, x0, xzr
# CHECK-NEXT: 1 1 0.25 subp x0, x1, x2
# CHECK-NEXT: 1 1 0.25 U subps x0, x1, x2
# CHECK-NEXT: 1 1 0.25 subp x0, sp, sp
# CHECK-NEXT: 1 1 0.25 U subps x0, sp, sp
# CHECK-NEXT: 1 1 0.25 U subps xzr, x0, x1
# CHECK-NEXT: 1 1 0.25 U subps xzr, sp, sp
# CHECK-NEXT: 1 1 0.50 * stg x0, [x1, #-4096]
# CHECK-NEXT: 1 1 0.50 * stg x1, [x2, #4080]
# CHECK-NEXT: 1 1 0.50 * stg x2, [sp, #16]
# CHECK-NEXT: 1 1 0.50 * stg x3, [x1]
# CHECK-NEXT: 1 1 0.50 * stg sp, [x1]
# CHECK-NEXT: 1 1 0.50 * stzg x0, [x1, #-4096]
# CHECK-NEXT: 1 1 0.50 * stzg x1, [x2, #4080]
# CHECK-NEXT: 1 1 0.50 * stzg x2, [sp, #16]
# CHECK-NEXT: 1 1 0.50 * stzg x3, [x1]
# CHECK-NEXT: 1 1 0.50 * stzg sp, [x1]
# CHECK-NEXT: 1 1 0.50 * U stg x0, [x1, #-4096]!
# CHECK-NEXT: 1 1 0.50 * U stg x1, [x2, #4080]!
# CHECK-NEXT: 1 1 0.50 * U stg x2, [sp, #16]!
# CHECK-NEXT: 1 1 0.50 * U stg sp, [sp, #16]!
# CHECK-NEXT: 1 1 0.50 * U stzg x0, [x1, #-4096]!
# CHECK-NEXT: 1 1 0.50 * U stzg x1, [x2, #4080]!
# CHECK-NEXT: 1 1 0.50 * U stzg x2, [sp, #16]!
# CHECK-NEXT: 1 1 0.50 * U stzg sp, [sp, #16]!
# CHECK-NEXT: 1 1 0.50 * U stg x0, [x1], #-4096
# CHECK-NEXT: 1 1 0.50 * U stg x1, [x2], #4080
# CHECK-NEXT: 1 1 0.50 * U stg x2, [sp], #16
# CHECK-NEXT: 1 1 0.50 * U stg sp, [sp], #16
# CHECK-NEXT: 1 1 0.50 * U stzg x0, [x1], #-4096
# CHECK-NEXT: 1 1 0.50 * U stzg x1, [x2], #4080
# CHECK-NEXT: 1 1 0.50 * U stzg x2, [sp], #16
# CHECK-NEXT: 1 1 0.50 * U stzg sp, [sp], #16
# CHECK-NEXT: 2 1 1.00 * st2g x0, [x1, #-4096]
# CHECK-NEXT: 2 1 1.00 * st2g x1, [x2, #4080]
# CHECK-NEXT: 2 1 1.00 * st2g x2, [sp, #16]
# CHECK-NEXT: 2 1 1.00 * st2g x3, [x1]
# CHECK-NEXT: 2 1 1.00 * st2g sp, [x1]
# CHECK-NEXT: 2 1 1.00 * stz2g x0, [x1, #-4096]
# CHECK-NEXT: 2 1 1.00 * stz2g x1, [x2, #4080]
# CHECK-NEXT: 2 1 1.00 * stz2g x2, [sp, #16]
# CHECK-NEXT: 2 1 1.00 * stz2g x3, [x1]
# CHECK-NEXT: 2 1 1.00 * stz2g sp, [x1]
# CHECK-NEXT: 2 1 1.00 * U st2g x0, [x1, #-4096]!
# CHECK-NEXT: 2 1 1.00 * U st2g x1, [x2, #4080]!
# CHECK-NEXT: 2 1 1.00 * U st2g x2, [sp, #16]!
# CHECK-NEXT: 2 1 1.00 * U st2g sp, [sp, #16]!
# CHECK-NEXT: 2 1 1.00 * U stz2g x0, [x1, #-4096]!
# CHECK-NEXT: 2 1 1.00 * U stz2g x1, [x2, #4080]!
# CHECK-NEXT: 2 1 1.00 * U stz2g x2, [sp, #16]!
# CHECK-NEXT: 2 1 1.00 * U stz2g sp, [sp, #16]!
# CHECK-NEXT: 2 1 1.00 * U st2g x0, [x1], #-4096
# CHECK-NEXT: 2 1 1.00 * U st2g x1, [x2], #4080
# CHECK-NEXT: 2 1 1.00 * U st2g x2, [sp], #16
# CHECK-NEXT: 2 1 1.00 * U st2g sp, [sp], #16
# CHECK-NEXT: 2 1 1.00 * U stz2g x0, [x1], #-4096
# CHECK-NEXT: 2 1 1.00 * U stz2g x1, [x2], #4080
# CHECK-NEXT: 2 1 1.00 * U stz2g x2, [sp], #16
# CHECK-NEXT: 2 1 1.00 * U stz2g sp, [sp], #16
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #-1024]
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #1008]
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp, #16]
# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2, #16]
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2, #16]
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2]
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #-1024]!
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #1008]!
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp, #16]!
# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2, #16]!
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2, #16]!
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2], #-1024
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2], #1008
# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp], #16
# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2], #16
# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2], #16
# CHECK-NEXT: 2 4 0.50 * ldg x0, [x1]
# CHECK-NEXT: 2 4 0.50 * ldg x2, [sp, #-4096]
# CHECK-NEXT: 2 4 0.50 * ldg x3, [x4, #4080]
# CHECK-NEXT: 2 4 0.50 * U ldgm x0, [x1]
# CHECK-NEXT: 2 4 0.50 * U ldgm x1, [sp]
# CHECK-NEXT: 2 4 0.50 * U ldgm xzr, [x2]
# CHECK-NEXT: 1 1 0.50 U stgm x0, [x1]
# CHECK-NEXT: 1 1 0.50 U stgm x1, [sp]
# CHECK-NEXT: 1 1 0.50 U stgm xzr, [x2]
# CHECK-NEXT: 1 1 0.50 U stzgm x0, [x1]
# CHECK-NEXT: 1 1 0.50 U stzgm x1, [sp]
# CHECK-NEXT: 1 1 0.50 U stzgm xzr, [x2]
# CHECK: Resources:
# CHECK-NEXT: [0.0] - Ampere1BUnitA
# CHECK-NEXT: [0.1] - Ampere1BUnitA
# CHECK-NEXT: [1.0] - Ampere1BUnitB
# CHECK-NEXT: [1.1] - Ampere1BUnitB
# CHECK-NEXT: [2] - Ampere1BUnitBS
# CHECK-NEXT: [3.0] - Ampere1BUnitL
# CHECK-NEXT: [3.1] - Ampere1BUnitL
# CHECK-NEXT: [4.0] - Ampere1BUnitS
# CHECK-NEXT: [4.1] - Ampere1BUnitS
# CHECK-NEXT: [5] - Ampere1BUnitX
# CHECK-NEXT: [6] - Ampere1BUnitY
# CHECK-NEXT: [7] - Ampere1BUnitZ
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7]
# CHECK-NEXT: 2.50 2.50 13.00 13.00 5.00 3.00 3.00 58.00 58.00 - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions:
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, x1
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg sp, x1
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, sp
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, x1, x2
# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg sp, x1, x2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x0, x1, #0, #1
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg sp, x2, #32, #3
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x0, sp, #64, #5
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x3, x4, #1008, #6
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x5, x6, #112, #15
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x0, x1, #0, #1
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg sp, x2, #32, #3
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x0, sp, #64, #5
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x3, x4, #1008, #6
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x5, x6, #112, #15
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x0, x1, x2
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x3, sp, x4
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi xzr, x0, x30
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x30, x0, xzr
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subp x0, x1, x2
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps x0, x1, x2
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subp x0, sp, sp
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps x0, sp, sp
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps xzr, x0, x1
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps xzr, sp, sp
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1, #-4096]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2, #4080]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp, #16]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x3, [x1]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [x1]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1, #-4096]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2, #4080]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp, #16]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x3, [x1]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [x1]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1, #-4096]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2, #4080]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp, #16]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [sp, #16]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1, #-4096]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2, #4080]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp, #16]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [sp, #16]!
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1], #-4096
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2], #4080
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp], #16
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [sp], #16
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1], #-4096
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2], #4080
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp], #16
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [sp], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1, #-4096]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2, #4080]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp, #16]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x3, [x1]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [x1]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1, #-4096]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2, #4080]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp, #16]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x3, [x1]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [x1]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1, #-4096]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2, #4080]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [sp, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1, #-4096]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2, #4080]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [sp, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1], #-4096
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2], #4080
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [sp], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1], #-4096
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2], #4080
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [sp], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #-1024]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #1008]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp, #16]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2, #16]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2, #16]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2]
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #-1024]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #1008]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2, #16]!
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2], #-1024
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2], #1008
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2], #16
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2], #16
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x0, [x1]
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x2, [sp, #-4096]
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x3, [x4, #4080]
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm x0, [x1]
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm x1, [sp]
# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm xzr, [x2]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm x0, [x1]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm x1, [sp]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm xzr, [x2]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm x0, [x1]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm x1, [sp]
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm xzr, [x2]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=ampere1b -resource-pressure=false < %s | FileCheck %s
add w0, w1, w2, lsl #0
sub x3, x4, x5, lsl #1
adds x6, x7, x8, lsr #2
subs x9, x10, x11, asr #3
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 156
# CHECK-NEXT: Total uOps: 600
# CHECK: Dispatch Width: 12
# CHECK-NEXT: uOps Per Cycle: 3.85
# CHECK-NEXT: IPC: 2.56
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.25 add w0, w1, w2
# CHECK-NEXT: 1 1 0.25 sub x3, x4, x5, lsl #1
# CHECK-NEXT: 2 2 0.50 adds x6, x7, x8, lsr #2
# CHECK-NEXT: 2 2 0.50 subs x9, x10, x11, asr #3