
Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
273 lines
8.8 KiB
LLVM
273 lines
8.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define void @bitcast_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: str d0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <4 x i16>, ptr %a
|
|
%cast = bitcast <4 x i16> %load to <4 x half>
|
|
store volatile <4 x half> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @bitcast_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <8 x i16>, ptr %a
|
|
%cast = bitcast <8 x i16> %load to <8 x half>
|
|
store volatile <8 x half> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <16 x i16>, ptr %a
|
|
%cast = bitcast <16 x i16> %load to <16 x half>
|
|
store volatile <16 x half> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v32i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: bitcast_v32i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: bitcast_v32i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%load = load volatile <32 x i16>, ptr %a
|
|
%cast = bitcast <32 x i16> %load to <32 x half>
|
|
store volatile <32 x half> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: bitcast_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <64 x i16>, ptr %a
|
|
%cast = bitcast <64 x i16> %load to <64 x half>
|
|
store volatile <64 x half> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: bitcast_v128i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <128 x i16>, ptr %a
|
|
%cast = bitcast <128 x i16> %load to <128 x half>
|
|
store volatile <128 x half> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define void @bitcast_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: str d0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <2 x i32>, ptr %a
|
|
%cast = bitcast <2 x i32> %load to <2 x float>
|
|
store volatile <2 x float> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @bitcast_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <4 x i32>, ptr %a
|
|
%cast = bitcast <4 x i32> %load to <4 x float>
|
|
store volatile <4 x float> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <8 x i32>, ptr %a
|
|
%cast = bitcast <8 x i32> %load to <8 x float>
|
|
store volatile <8 x float> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: bitcast_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: bitcast_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%load = load volatile <16 x i32>, ptr %a
|
|
%cast = bitcast <16 x i32> %load to <16 x float>
|
|
store volatile <16 x float> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: bitcast_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <32 x i32>, ptr %a
|
|
%cast = bitcast <32 x i32> %load to <32 x float>
|
|
store volatile <32 x float> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: bitcast_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <64 x i32>, ptr %a
|
|
%cast = bitcast <64 x i32> %load to <64 x float>
|
|
store volatile <64 x float> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define void @bitcast_v1i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: str d0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <1 x i64>, ptr %a
|
|
%cast = bitcast <1 x i64> %load to <1 x double>
|
|
store volatile <1 x double> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @bitcast_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <2 x i64>, ptr %a
|
|
%cast = bitcast <2 x i64> %load to <2 x double>
|
|
store volatile <2 x double> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: bitcast_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <4 x i64>, ptr %a
|
|
%cast = bitcast <4 x i64> %load to <4 x double>
|
|
store volatile <4 x double> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: bitcast_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: bitcast_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%load = load volatile <8 x i64>, ptr %a
|
|
%cast = bitcast <8 x i64> %load to <8 x double>
|
|
store volatile <8 x double> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: bitcast_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <16 x i64>, ptr %a
|
|
%cast = bitcast <16 x i64> %load to <16 x double>
|
|
store volatile <16 x double> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: bitcast_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%load = load volatile <32 x i64>, ptr %a
|
|
%cast = bitcast <32 x i64> %load to <32 x double>
|
|
store volatile <32 x double> %cast, ptr %b
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|