
Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
711 lines
23 KiB
LLVM
711 lines
23 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
|
|
|
|
declare void @use(i32 %arg)
|
|
declare void @vec_use(<4 x i32> %arg)
|
|
|
|
; (x+c1)+c2
|
|
|
|
define i32 @add_const_add_const(i32 %arg) {
|
|
; CHECK-LABEL: add_const_add_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add w0, w0, #10
|
|
; CHECK-NEXT: ret
|
|
%t0 = add i32 %arg, 8
|
|
%t1 = add i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @add_const_add_const_extrause(i32 %arg) {
|
|
; CHECK-LABEL: add_const_add_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: add w0, w0, #8
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: add w0, w19, #10
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = add i32 %arg, 8
|
|
call void @use(i32 %t0)
|
|
%t1 = add i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_add_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #10
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_add_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #10
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_add_const_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI4_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; (x+c1)-c2
|
|
|
|
define i32 @add_const_sub_const(i32 %arg) {
|
|
; CHECK-LABEL: add_const_sub_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add w0, w0, #6
|
|
; CHECK-NEXT: ret
|
|
%t0 = add i32 %arg, 8
|
|
%t1 = sub i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @add_const_sub_const_extrause(i32 %arg) {
|
|
; CHECK-LABEL: add_const_sub_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: add w0, w0, #8
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: add w0, w19, #6
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = add i32 %arg, 8
|
|
call void @use(i32 %t0)
|
|
%t1 = sub i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_sub_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #6
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_sub_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #6
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_sub_const_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI9_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; c2-(x+c1)
|
|
|
|
define i32 @add_const_const_sub(i32 %arg) {
|
|
; CHECK-LABEL: add_const_const_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #-6 // =0xfffffffa
|
|
; CHECK-NEXT: sub w0, w8, w0
|
|
; CHECK-NEXT: ret
|
|
%t0 = add i32 %arg, 8
|
|
%t1 = sub i32 2, %t0
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @add_const_const_sub_extrause(i32 %arg) {
|
|
; CHECK-LABEL: add_const_const_sub_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: add w0, w0, #8
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: mov w8, #-6 // =0xfffffffa
|
|
; CHECK-NEXT: sub w0, w8, w19
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = add i32 %arg, 8
|
|
call void @use(i32 %t0)
|
|
%t1 = sub i32 2, %t0
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_const_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mvni v1.4s, #5
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_const_sub_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: mvni v0.4s, #5
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_add_const_const_sub_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI14_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; (x-c1)+c2
|
|
|
|
define i32 @sub_const_add_const(i32 %arg) {
|
|
; CHECK-LABEL: sub_const_add_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub w0, w0, #6
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 %arg, 8
|
|
%t1 = add i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @sub_const_add_const_extrause(i32 %arg) {
|
|
; CHECK-LABEL: sub_const_add_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: sub w0, w0, #8
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: sub w0, w19, #6
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 %arg, 8
|
|
call void @use(i32 %t0)
|
|
%t1 = add i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_add_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mvni v1.4s, #5
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_add_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: mvni v0.4s, #5
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_add_const_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI19_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; (x-c1)-c2
|
|
|
|
define i32 @sub_const_sub_const(i32 %arg) {
|
|
; CHECK-LABEL: sub_const_sub_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub w0, w0, #10
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 %arg, 8
|
|
%t1 = sub i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @sub_const_sub_const_extrause(i32 %arg) {
|
|
; CHECK-LABEL: sub_const_sub_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: sub w0, w0, #8
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: sub w0, w19, #10
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 %arg, 8
|
|
call void @use(i32 %t0)
|
|
%t1 = sub i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_sub_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #10
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_sub_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #10
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_sub_const_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI24_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; c2-(x-c1)
|
|
|
|
define i32 @sub_const_const_sub(i32 %arg) {
|
|
; CHECK-LABEL: sub_const_const_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #10 // =0xa
|
|
; CHECK-NEXT: sub w0, w8, w0
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 %arg, 8
|
|
%t1 = sub i32 2, %t0
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @sub_const_const_sub_extrause(i32 %arg) {
|
|
; CHECK-LABEL: sub_const_const_sub_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: sub w0, w0, #8
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: mov w8, #10 // =0xa
|
|
; CHECK-NEXT: sub w0, w8, w19
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 %arg, 8
|
|
call void @use(i32 %t0)
|
|
%t1 = sub i32 2, %t0
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_const_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #10
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_const_sub_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #2
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_sub_const_const_sub_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI29_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI29_0]
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
|
|
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; (c1-x)+c2
|
|
|
|
define i32 @const_sub_add_const(i32 %arg) {
|
|
; CHECK-LABEL: const_sub_add_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #10 // =0xa
|
|
; CHECK-NEXT: sub w0, w8, w0
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 8, %arg
|
|
%t1 = add i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @const_sub_add_const_extrause(i32 %arg) {
|
|
; CHECK-LABEL: const_sub_add_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w8, #8 // =0x8
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: sub w0, w8, w0
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: mov w8, #10 // =0xa
|
|
; CHECK-NEXT: sub w0, w8, w19
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 8, %arg
|
|
call void @use(i32 %t0)
|
|
%t1 = add i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_add_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #10
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_add_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #10
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_add_const_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI34_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI34_0]
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
|
|
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; (c1-x)-c2
|
|
|
|
define i32 @const_sub_sub_const(i32 %arg) {
|
|
; CHECK-LABEL: const_sub_sub_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #6 // =0x6
|
|
; CHECK-NEXT: sub w0, w8, w0
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 8, %arg
|
|
%t1 = sub i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @const_sub_sub_const_extrause(i32 %arg) {
|
|
; CHECK-LABEL: const_sub_sub_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w8, #8 // =0x8
|
|
; CHECK-NEXT: mov w19, w0
|
|
; CHECK-NEXT: sub w0, w8, w0
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: mov w8, #6 // =0x6
|
|
; CHECK-NEXT: sub w0, w8, w19
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 8, %arg
|
|
call void @use(i32 %t0)
|
|
%t1 = sub i32 %t0, 2
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_sub_const:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #6
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_sub_const_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #6
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_sub_const_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI39_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
|
|
%t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
; c2-(c1-x)
|
|
|
|
define i32 @const_sub_const_sub(i32 %arg) {
|
|
; CHECK-LABEL: const_sub_const_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub w0, w0, #6
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 8, %arg
|
|
%t1 = sub i32 2, %t0
|
|
ret i32 %t1
|
|
}
|
|
|
|
define i32 @const_sub_const_sub_extrause(i32 %arg) {
|
|
; CHECK-LABEL: const_sub_const_sub_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: mov w8, #8 // =0x8
|
|
; CHECK-NEXT: sub w19, w8, w0
|
|
; CHECK-NEXT: mov w0, w19
|
|
; CHECK-NEXT: bl use
|
|
; CHECK-NEXT: mov w8, #2 // =0x2
|
|
; CHECK-NEXT: sub w0, w8, w19
|
|
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub i32 8, %arg
|
|
call void @use(i32 %t0)
|
|
%t1 = sub i32 2, %t0
|
|
ret i32 %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_const_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mvni v1.4s, #5
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
|
|
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_const_sub_extrause:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: movi v1.4s, #8
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: bl vec_use
|
|
; CHECK-NEXT: movi v0.4s, #2
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
|
|
call void @vec_use(<4 x i32> %t0)
|
|
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|
|
|
|
define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
|
|
; CHECK-LABEL: vec_const_sub_const_sub_nonsplat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI44_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI44_0]
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
|
|
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
|
|
ret <4 x i32> %t1
|
|
}
|