
The existing way of managing clustered nodes was done through adding weak edges between the neighbouring cluster nodes, which is a sort of ordered queue. And this will be later recorded as `NextClusterPred` or `NextClusterSucc` in `ScheduleDAGMI`. But actually the instruction may be picked not in the exact order of the queue. For example, we have a queue of cluster nodes A B C. But during scheduling, node B might be picked first, then it will be very likely that we only cluster B and C for Top-Down scheduling (leaving A alone). Another issue is: ``` if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) ``` may break the cluster queue. For example, we want to cluster nodes (order as in `MemOpRecords`): 1 3 2. 1(SUa) will be pred of 3(SUb) normally. But when it comes to (3, 2), As 3(SUa) > 2(SUb), we would reorder the two nodes, which makes 2 be pred of 3. This makes both 1 and 2 become preds of 3, but there is no edge between 1 and 2. Thus we get a broken cluster chain. To fix both issues, we introduce an unordered set in the change. This could help improve clustering in some hard case. One key reason the change causes so many test check changes is: As the cluster candidates are not ordered now, the candidates might be picked in different order from before. The most affected targets are: AMDGPU, AArch64, RISCV. For RISCV, it seems to me most are just minor instruction reorder, don't see obvious regression. For AArch64, there were some combining of ldr into ldp being affected. With two cases being regressed and two being improved. This has more deeper reason that machine scheduler cannot cluster them well both before and after the change, and the load combine algorithm later is also not smart enough. For AMDGPU, some cases have more v_dual instructions used while some are regressed. It seems less critical. Seems like test `v_vselect_v32bf16` gets more buffer_load being claused.
770 lines
26 KiB
LLVM
770 lines
26 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
|
|
; RUN: llc -mtriple=aarch64-none-linux-gnu -o - -global-isel %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
|
|
|
;; Check that the llvm aarch64 backend can handle arrays of
|
|
;; structs and vice versa when passed from IR.
|
|
;; (this layering is something clang would normally simplify)
|
|
;;
|
|
;; Some of these examples are not ABI compliant and they're not
|
|
;; meant to be. For instance according to the ABI an aggregate
|
|
;; with more than 4 members must go in memory. This restriction
|
|
;; is applied earlier in the compilation process so here we do
|
|
;; see 8 member types in registers.
|
|
;;
|
|
;; When we have more than 8 members we simply run out of registers
|
|
;; and that's what produces the 8 limit here.
|
|
|
|
;; Plain arrays
|
|
|
|
define [ 0 x double ] @array_0() {
|
|
; CHECK-LABEL: array_0:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
ret [ 0 x double ] zeroinitializer
|
|
}
|
|
|
|
define [ 1 x double ] @array_1() {
|
|
; CHECK-LABEL: array_1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x double ] zeroinitializer
|
|
}
|
|
|
|
define [ 8 x double ] @array_8() {
|
|
; CHECK-LABEL: array_8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: movi d4, #0000000000000000
|
|
; CHECK-NEXT: movi d5, #0000000000000000
|
|
; CHECK-NEXT: movi d6, #0000000000000000
|
|
; CHECK-NEXT: movi d7, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 8 x double ] zeroinitializer
|
|
}
|
|
|
|
;; > 8 items goes on the stack
|
|
|
|
define [ 9 x double ] @array_9() {
|
|
; CHECK-SD-LABEL: array_9:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-SD-NEXT: str xzr, [x8, #64]
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8]
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8, #32]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: array_9:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #16]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #32]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #48]
|
|
; CHECK-GI-NEXT: str xzr, [x8, #64]
|
|
; CHECK-GI-NEXT: ret
|
|
ret [ 9 x double ] zeroinitializer
|
|
}
|
|
|
|
;; Won't use any registers, just checking for assumptions.
|
|
%T_STRUCT_0M = type { }
|
|
|
|
define %T_STRUCT_0M @struct_zero_fields() {
|
|
; CHECK-LABEL: struct_zero_fields:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
ret %T_STRUCT_0M zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_STRUCT_0M ] @array_of_struct_zero_fields() {
|
|
; CHECK-LABEL: array_of_struct_zero_fields:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_STRUCT_0M ] zeroinitializer
|
|
}
|
|
|
|
define [ 2 x %T_STRUCT_0M ] @array_of_struct_zero_fields_in_struct() {
|
|
; CHECK-LABEL: array_of_struct_zero_fields_in_struct:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_STRUCT_0M ] zeroinitializer
|
|
}
|
|
|
|
%T_STRUCT_1M = type { i32 }
|
|
|
|
define %T_STRUCT_1M @struct_one_field() {
|
|
; CHECK-LABEL: struct_one_field:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
ret %T_STRUCT_1M zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_STRUCT_1M ] @array_of_struct_one_field() {
|
|
; CHECK-LABEL: array_of_struct_one_field:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_STRUCT_1M ] zeroinitializer
|
|
}
|
|
|
|
;; This one will be a reg block
|
|
define [ 2 x %T_STRUCT_1M ] @array_of_struct_one_field_2() {
|
|
; CHECK-LABEL: array_of_struct_one_field_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: mov w1, wzr
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_STRUCT_1M ] zeroinitializer
|
|
}
|
|
|
|
;; Different types for each field, will not be put in a reg block
|
|
%T_STRUCT_DIFFM = type { double, i32 }
|
|
|
|
define %T_STRUCT_DIFFM @struct_different_field_types() {
|
|
; CHECK-LABEL: struct_different_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
ret %T_STRUCT_DIFFM zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_STRUCT_DIFFM ] @array_of_struct_different_field_types() {
|
|
; CHECK-LABEL: array_of_struct_different_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_STRUCT_DIFFM ] zeroinitializer
|
|
}
|
|
|
|
define [ 2 x %T_STRUCT_DIFFM ] @array_of_struct_different_field_types_2() {
|
|
; CHECK-LABEL: array_of_struct_different_field_types_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: mov w1, wzr
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_STRUCT_DIFFM ] zeroinitializer
|
|
}
|
|
|
|
;; Each field is the same type, can be put in a reg block
|
|
%T_STRUCT_SAMEM = type { double, double }
|
|
|
|
;; Here isn't a block as such, we just allocate two consecutive registers
|
|
define %T_STRUCT_SAMEM @struct_same_field_types() {
|
|
; CHECK-LABEL: struct_same_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret %T_STRUCT_SAMEM zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_STRUCT_SAMEM ] @array_of_struct_same_field_types() {
|
|
; CHECK-LABEL: array_of_struct_same_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_STRUCT_SAMEM ] zeroinitializer
|
|
}
|
|
|
|
define [ 2 x %T_STRUCT_SAMEM ] @array_of_struct_same_field_types_2() {
|
|
; CHECK-LABEL: array_of_struct_same_field_types_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_STRUCT_SAMEM ] zeroinitializer
|
|
}
|
|
|
|
;; Same field type but integer this time. Put into x registers instead.
|
|
%T_STRUCT_SAMEM_INT = type { i64, i64 }
|
|
|
|
define %T_STRUCT_SAMEM_INT @struct_same_field_types_int() {
|
|
; CHECK-LABEL: struct_same_field_types_int:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: mov x1, xzr
|
|
; CHECK-NEXT: ret
|
|
ret %T_STRUCT_SAMEM_INT zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_STRUCT_SAMEM_INT ] @array_of_struct_same_field_types_int() {
|
|
; CHECK-LABEL: array_of_struct_same_field_types_int:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: mov x1, xzr
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_STRUCT_SAMEM_INT ] zeroinitializer
|
|
}
|
|
|
|
define [ 2 x %T_STRUCT_SAMEM_INT ] @array_of_struct_same_field_types_int_2() {
|
|
; CHECK-LABEL: array_of_struct_same_field_types_int_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: mov x1, xzr
|
|
; CHECK-NEXT: mov x2, xzr
|
|
; CHECK-NEXT: mov x3, xzr
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_STRUCT_SAMEM_INT ] zeroinitializer
|
|
}
|
|
|
|
;; An aggregate of more than 8 items must go in memory.
|
|
;; 4x2 struct fields = 8 items so it goes in a block.
|
|
|
|
define [ 4 x %T_STRUCT_SAMEM ] @array_of_struct_8_fields() {
|
|
; CHECK-LABEL: array_of_struct_8_fields:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: movi d4, #0000000000000000
|
|
; CHECK-NEXT: movi d5, #0000000000000000
|
|
; CHECK-NEXT: movi d6, #0000000000000000
|
|
; CHECK-NEXT: movi d7, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 4 x %T_STRUCT_SAMEM ] zeroinitializer
|
|
}
|
|
|
|
;; 5x2 fields = 10 so it is returned in memory.
|
|
|
|
define [ 5 x %T_STRUCT_SAMEM ] @array_of_struct_in_memory() {
|
|
; CHECK-SD-LABEL: array_of_struct_in_memory:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8, #16]
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8, #48]
|
|
; CHECK-SD-NEXT: str q0, [x8]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: array_of_struct_in_memory:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #16]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #32]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #48]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #64]
|
|
; CHECK-GI-NEXT: ret
|
|
ret [ 5 x %T_STRUCT_SAMEM ] zeroinitializer
|
|
}
|
|
|
|
;; A struct whose field is an array.
|
|
%T_STRUCT_ARRAYM = type { [ 2 x double ]};
|
|
|
|
define %T_STRUCT_ARRAYM @struct_array_field() {
|
|
; CHECK-LABEL: struct_array_field:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret %T_STRUCT_ARRAYM zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_STRUCT_ARRAYM ] @array_of_struct_array_field() {
|
|
; CHECK-LABEL: array_of_struct_array_field:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_STRUCT_ARRAYM ] zeroinitializer
|
|
}
|
|
|
|
define [ 2 x %T_STRUCT_ARRAYM ] @array_of_struct_array_field_2() {
|
|
; CHECK-LABEL: array_of_struct_array_field_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_STRUCT_ARRAYM ] zeroinitializer
|
|
}
|
|
|
|
;; All non-aggregate fields must have the same type, all through the
|
|
;; overall aggreagate. This is false here because of the i32.
|
|
%T_NESTED_STRUCT_DIFFM = type {
|
|
[ 1 x { { double, double } } ],
|
|
[ 1 x { { double, i32 } } ]
|
|
};
|
|
|
|
define %T_NESTED_STRUCT_DIFFM @struct_nested_different_field_types() {
|
|
; CHECK-LABEL: struct_nested_different_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret %T_NESTED_STRUCT_DIFFM zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_NESTED_STRUCT_DIFFM ] @array_of_struct_nested_different_field_types() {
|
|
; CHECK-LABEL: array_of_struct_nested_different_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_NESTED_STRUCT_DIFFM ] zeroinitializer
|
|
}
|
|
|
|
define [ 2 x %T_NESTED_STRUCT_DIFFM ] @array_of_struct_nested_different_field_types_2() {
|
|
; CHECK-LABEL: array_of_struct_nested_different_field_types_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: mov w1, wzr
|
|
; CHECK-NEXT: movi d4, #0000000000000000
|
|
; CHECK-NEXT: movi d5, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 2 x %T_NESTED_STRUCT_DIFFM ] zeroinitializer
|
|
}
|
|
|
|
;; All fields here are the same type, more nesting to stress the recursive walk.
|
|
%T_NESTED_STRUCT_SAMEM = type {
|
|
{ { double} },
|
|
{ [ 2 x { double, double } ] }
|
|
};
|
|
|
|
define %T_NESTED_STRUCT_SAMEM @struct_nested_same_field_types() {
|
|
; CHECK-LABEL: struct_nested_same_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: movi d4, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret %T_NESTED_STRUCT_SAMEM zeroinitializer
|
|
}
|
|
|
|
define [ 1 x %T_NESTED_STRUCT_SAMEM ] @array_of_struct_nested_same_field_types() {
|
|
; CHECK-LABEL: array_of_struct_nested_same_field_types:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: movi d4, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret [ 1 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer
|
|
}
|
|
|
|
;; 2 x (1 + (2 x 2)) = 10 so this is returned in memory
|
|
define [ 2 x %T_NESTED_STRUCT_SAMEM ] @array_of_struct_nested_same_field_types_2() {
|
|
; CHECK-SD-LABEL: array_of_struct_nested_same_field_types_2:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8, #16]
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8, #48]
|
|
; CHECK-SD-NEXT: str q0, [x8]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: array_of_struct_nested_same_field_types_2:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #16]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #32]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #48]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #64]
|
|
; CHECK-GI-NEXT: ret
|
|
ret [ 2 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer
|
|
}
|
|
|
|
;; Check combinations of call, return and argument passing
|
|
|
|
%T_IN_BLOCK = type [ 2 x { double, { double, double } } ]
|
|
|
|
define %T_IN_BLOCK @return_in_block() {
|
|
; CHECK-LABEL: return_in_block:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
|
; CHECK-NEXT: movi d3, #0000000000000000
|
|
; CHECK-NEXT: movi d4, #0000000000000000
|
|
; CHECK-NEXT: movi d5, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret %T_IN_BLOCK zeroinitializer
|
|
}
|
|
|
|
@in_block_store = dso_local global %T_IN_BLOCK zeroinitializer, align 8
|
|
|
|
define void @caller_in_block() {
|
|
; CHECK-SD-LABEL: caller_in_block:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: bl return_in_block
|
|
; CHECK-SD-NEXT: adrp x8, in_block_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:in_block_store
|
|
; CHECK-SD-NEXT: stp d0, d1, [x8]
|
|
; CHECK-SD-NEXT: stp d2, d3, [x8, #16]
|
|
; CHECK-SD-NEXT: stp d4, d5, [x8, #32]
|
|
; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: caller_in_block:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: bl return_in_block
|
|
; CHECK-GI-NEXT: adrp x8, in_block_store
|
|
; CHECK-GI-NEXT: str d0, [x8, :lo12:in_block_store]
|
|
; CHECK-GI-NEXT: adrp x8, in_block_store
|
|
; CHECK-GI-NEXT: add x8, x8, :lo12:in_block_store
|
|
; CHECK-GI-NEXT: stp d1, d2, [x8, #8]
|
|
; CHECK-GI-NEXT: stp d3, d4, [x8, #24]
|
|
; CHECK-GI-NEXT: str d5, [x8, #40]
|
|
; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
%1 = call %T_IN_BLOCK @return_in_block()
|
|
store %T_IN_BLOCK %1, ptr @in_block_store
|
|
ret void
|
|
}
|
|
|
|
define void @callee_in_block(%T_IN_BLOCK %a) {
|
|
; CHECK-SD-LABEL: callee_in_block:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: adrp x8, in_block_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:in_block_store
|
|
; CHECK-SD-NEXT: stp d4, d5, [x8, #32]
|
|
; CHECK-SD-NEXT: stp d2, d3, [x8, #16]
|
|
; CHECK-SD-NEXT: stp d0, d1, [x8]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: callee_in_block:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: adrp x8, in_block_store
|
|
; CHECK-GI-NEXT: str d0, [x8, :lo12:in_block_store]
|
|
; CHECK-GI-NEXT: adrp x8, in_block_store
|
|
; CHECK-GI-NEXT: add x8, x8, :lo12:in_block_store
|
|
; CHECK-GI-NEXT: stp d1, d2, [x8, #8]
|
|
; CHECK-GI-NEXT: stp d3, d4, [x8, #24]
|
|
; CHECK-GI-NEXT: str d5, [x8, #40]
|
|
; CHECK-GI-NEXT: ret
|
|
store %T_IN_BLOCK %a, ptr @in_block_store
|
|
ret void
|
|
}
|
|
|
|
define void @argument_in_block() {
|
|
; CHECK-SD-LABEL: argument_in_block:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: adrp x8, in_block_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:in_block_store
|
|
; CHECK-SD-NEXT: ldp d4, d5, [x8, #32]
|
|
; CHECK-SD-NEXT: ldp d2, d3, [x8, #16]
|
|
; CHECK-SD-NEXT: ldp d0, d1, [x8]
|
|
; CHECK-SD-NEXT: bl callee_in_block
|
|
; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: argument_in_block:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: adrp x9, in_block_store
|
|
; CHECK-GI-NEXT: add x9, x9, :lo12:in_block_store
|
|
; CHECK-GI-NEXT: adrp x8, in_block_store
|
|
; CHECK-GI-NEXT: ldp d1, d2, [x9, #8]
|
|
; CHECK-GI-NEXT: ldr d0, [x8, :lo12:in_block_store]
|
|
; CHECK-GI-NEXT: ldp d3, d4, [x9, #24]
|
|
; CHECK-GI-NEXT: ldr d5, [x9, #40]
|
|
; CHECK-GI-NEXT: bl callee_in_block
|
|
; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
%1 = load %T_IN_BLOCK, ptr @in_block_store
|
|
call void @callee_in_block(%T_IN_BLOCK %1)
|
|
ret void
|
|
}
|
|
|
|
%T_IN_MEMORY = type [ 3 x { double, { double, double } } ]
|
|
|
|
define %T_IN_MEMORY @return_in_memory() {
|
|
; CHECK-SD-LABEL: return_in_memory:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-SD-NEXT: str xzr, [x8, #64]
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8]
|
|
; CHECK-SD-NEXT: stp q0, q0, [x8, #32]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: return_in_memory:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #16]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #32]
|
|
; CHECK-GI-NEXT: stp xzr, xzr, [x8, #48]
|
|
; CHECK-GI-NEXT: str xzr, [x8, #64]
|
|
; CHECK-GI-NEXT: ret
|
|
ret %T_IN_MEMORY zeroinitializer
|
|
}
|
|
|
|
@in_memory_store = dso_local global %T_IN_MEMORY zeroinitializer, align 8
|
|
|
|
define void @caller_in_memory() {
|
|
; CHECK-SD-LABEL: caller_in_memory:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: sub sp, sp, #96
|
|
; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: add x8, sp, #8
|
|
; CHECK-SD-NEXT: bl return_in_memory
|
|
; CHECK-SD-NEXT: ldur q0, [sp, #24]
|
|
; CHECK-SD-NEXT: ldur q1, [sp, #8]
|
|
; CHECK-SD-NEXT: adrp x8, in_memory_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:in_memory_store
|
|
; CHECK-SD-NEXT: ldr d2, [sp, #72]
|
|
; CHECK-SD-NEXT: ldur q3, [sp, #56]
|
|
; CHECK-SD-NEXT: ldur q4, [sp, #40]
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: stp q1, q0, [x8]
|
|
; CHECK-SD-NEXT: str d2, [x8, #64]
|
|
; CHECK-SD-NEXT: stp q4, q3, [x8, #32]
|
|
; CHECK-SD-NEXT: add sp, sp, #96
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: caller_in_memory:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: sub sp, sp, #96
|
|
; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: add x8, sp, #8
|
|
; CHECK-GI-NEXT: bl return_in_memory
|
|
; CHECK-GI-NEXT: ldp x8, x9, [sp, #8]
|
|
; CHECK-GI-NEXT: adrp x10, in_memory_store
|
|
; CHECK-GI-NEXT: ldp x11, x12, [sp, #24]
|
|
; CHECK-GI-NEXT: ldp x13, x14, [sp, #40]
|
|
; CHECK-GI-NEXT: ldp x15, x16, [sp, #56]
|
|
; CHECK-GI-NEXT: ldp x17, x30, [sp, #72] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: str x8, [x10, :lo12:in_memory_store]
|
|
; CHECK-GI-NEXT: adrp x8, in_memory_store
|
|
; CHECK-GI-NEXT: add x8, x8, :lo12:in_memory_store
|
|
; CHECK-GI-NEXT: stp x9, x11, [x8, #8]
|
|
; CHECK-GI-NEXT: stp x12, x13, [x8, #24]
|
|
; CHECK-GI-NEXT: stp x14, x15, [x8, #40]
|
|
; CHECK-GI-NEXT: stp x16, x17, [x8, #56]
|
|
; CHECK-GI-NEXT: add sp, sp, #96
|
|
; CHECK-GI-NEXT: ret
|
|
%1 = call %T_IN_MEMORY @return_in_memory()
|
|
store %T_IN_MEMORY %1, ptr @in_memory_store
|
|
ret void
|
|
}
|
|
|
|
define void @callee_in_memory(%T_IN_MEMORY %a) {
|
|
; CHECK-SD-LABEL: callee_in_memory:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: ldp q1, q2, [sp, #32]
|
|
; CHECK-SD-NEXT: adrp x8, in_memory_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:in_memory_store
|
|
; CHECK-SD-NEXT: ldr d0, [sp, #64]
|
|
; CHECK-SD-NEXT: str d0, [x8, #64]
|
|
; CHECK-SD-NEXT: str q2, [x8, #48]
|
|
; CHECK-SD-NEXT: ldp q2, q0, [sp]
|
|
; CHECK-SD-NEXT: stp q0, q1, [x8, #16]
|
|
; CHECK-SD-NEXT: str q2, [x8]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: callee_in_memory:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: ldp x8, x9, [sp]
|
|
; CHECK-GI-NEXT: adrp x10, in_memory_store
|
|
; CHECK-GI-NEXT: ldp x11, x12, [sp, #16]
|
|
; CHECK-GI-NEXT: str x8, [x10, :lo12:in_memory_store]
|
|
; CHECK-GI-NEXT: adrp x8, in_memory_store
|
|
; CHECK-GI-NEXT: add x8, x8, :lo12:in_memory_store
|
|
; CHECK-GI-NEXT: stp x9, x11, [x8, #8]
|
|
; CHECK-GI-NEXT: ldp x9, x10, [sp, #32]
|
|
; CHECK-GI-NEXT: stp x12, x9, [x8, #24]
|
|
; CHECK-GI-NEXT: ldp x9, x11, [sp, #48]
|
|
; CHECK-GI-NEXT: str x10, [x8, #40]
|
|
; CHECK-GI-NEXT: ldr x10, [sp, #64]
|
|
; CHECK-GI-NEXT: stp x9, x11, [x8, #48]
|
|
; CHECK-GI-NEXT: str x10, [x8, #64]
|
|
; CHECK-GI-NEXT: ret
|
|
store %T_IN_MEMORY %a, ptr @in_memory_store
|
|
ret void
|
|
}
|
|
|
|
define void @argument_in_memory() {
|
|
; CHECK-SD-LABEL: argument_in_memory:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: sub sp, sp, #96
|
|
; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: adrp x8, in_memory_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:in_memory_store
|
|
; CHECK-SD-NEXT: ldp q0, q1, [x8]
|
|
; CHECK-SD-NEXT: ldr d4, [x8, #64]
|
|
; CHECK-SD-NEXT: ldp q2, q3, [x8, #32]
|
|
; CHECK-SD-NEXT: str d4, [sp, #64]
|
|
; CHECK-SD-NEXT: stp q0, q1, [sp]
|
|
; CHECK-SD-NEXT: stp q2, q3, [sp, #32]
|
|
; CHECK-SD-NEXT: bl callee_in_memory
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: add sp, sp, #96
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: argument_in_memory:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: sub sp, sp, #96
|
|
; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: adrp x9, in_memory_store
|
|
; CHECK-GI-NEXT: add x9, x9, :lo12:in_memory_store
|
|
; CHECK-GI-NEXT: adrp x8, in_memory_store
|
|
; CHECK-GI-NEXT: ldp x10, x11, [x9, #8]
|
|
; CHECK-GI-NEXT: ldr x8, [x8, :lo12:in_memory_store]
|
|
; CHECK-GI-NEXT: ldp x12, x13, [x9, #24]
|
|
; CHECK-GI-NEXT: ldp x14, x15, [x9, #40]
|
|
; CHECK-GI-NEXT: ldp x16, x9, [x9, #56]
|
|
; CHECK-GI-NEXT: stp x8, x10, [sp]
|
|
; CHECK-GI-NEXT: stp x11, x12, [sp, #16]
|
|
; CHECK-GI-NEXT: stp x13, x14, [sp, #32]
|
|
; CHECK-GI-NEXT: stp x15, x16, [sp, #48]
|
|
; CHECK-GI-NEXT: str x9, [sp, #64]
|
|
; CHECK-GI-NEXT: bl callee_in_memory
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: add sp, sp, #96
|
|
; CHECK-GI-NEXT: ret
|
|
%1 = load %T_IN_MEMORY, ptr @in_memory_store
|
|
call void @callee_in_memory(%T_IN_MEMORY %1)
|
|
ret void
|
|
}
|
|
|
|
%T_NO_BLOCK = type [ 2 x { double, { i32 } } ]
|
|
|
|
define %T_NO_BLOCK @return_no_block() {
|
|
; CHECK-LABEL: return_no_block:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: mov w1, wzr
|
|
; CHECK-NEXT: ret
|
|
ret %T_NO_BLOCK zeroinitializer
|
|
}
|
|
|
|
@no_block_store = dso_local global %T_NO_BLOCK zeroinitializer, align 8
|
|
|
|
define void @caller_no_block() {
|
|
; CHECK-SD-LABEL: caller_no_block:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: bl return_no_block
|
|
; CHECK-SD-NEXT: adrp x8, no_block_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:no_block_store
|
|
; CHECK-SD-NEXT: str d0, [x8]
|
|
; CHECK-SD-NEXT: str w0, [x8, #8]
|
|
; CHECK-SD-NEXT: str d1, [x8, #16]
|
|
; CHECK-SD-NEXT: str w1, [x8, #24]
|
|
; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: caller_no_block:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: bl return_no_block
|
|
; CHECK-GI-NEXT: adrp x8, no_block_store
|
|
; CHECK-GI-NEXT: str d0, [x8, :lo12:no_block_store]
|
|
; CHECK-GI-NEXT: adrp x8, no_block_store
|
|
; CHECK-GI-NEXT: add x8, x8, :lo12:no_block_store
|
|
; CHECK-GI-NEXT: str w0, [x8, #8]
|
|
; CHECK-GI-NEXT: str d1, [x8, #16]
|
|
; CHECK-GI-NEXT: str w1, [x8, #24]
|
|
; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
%1 = call %T_NO_BLOCK @return_no_block()
|
|
store %T_NO_BLOCK %1, ptr @no_block_store
|
|
ret void
|
|
}
|
|
|
|
define void @callee_no_block(%T_NO_BLOCK %a) {
|
|
; CHECK-SD-LABEL: callee_no_block:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: adrp x8, no_block_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:no_block_store
|
|
; CHECK-SD-NEXT: str w1, [x8, #24]
|
|
; CHECK-SD-NEXT: str d1, [x8, #16]
|
|
; CHECK-SD-NEXT: str w0, [x8, #8]
|
|
; CHECK-SD-NEXT: str d0, [x8]
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: callee_no_block:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: adrp x8, no_block_store
|
|
; CHECK-GI-NEXT: str d0, [x8, :lo12:no_block_store]
|
|
; CHECK-GI-NEXT: adrp x8, no_block_store
|
|
; CHECK-GI-NEXT: add x8, x8, :lo12:no_block_store
|
|
; CHECK-GI-NEXT: str w0, [x8, #8]
|
|
; CHECK-GI-NEXT: str d1, [x8, #16]
|
|
; CHECK-GI-NEXT: str w1, [x8, #24]
|
|
; CHECK-GI-NEXT: ret
|
|
store %T_NO_BLOCK %a, ptr @no_block_store
|
|
ret void
|
|
}
|
|
|
|
define void @argument_no_block() {
|
|
; CHECK-SD-LABEL: argument_no_block:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: adrp x8, no_block_store
|
|
; CHECK-SD-NEXT: add x8, x8, :lo12:no_block_store
|
|
; CHECK-SD-NEXT: ldr w1, [x8, #24]
|
|
; CHECK-SD-NEXT: ldr d1, [x8, #16]
|
|
; CHECK-SD-NEXT: ldr w0, [x8, #8]
|
|
; CHECK-SD-NEXT: ldr d0, [x8]
|
|
; CHECK-SD-NEXT: bl callee_no_block
|
|
; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: argument_no_block:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: adrp x8, no_block_store
|
|
; CHECK-GI-NEXT: adrp x9, no_block_store
|
|
; CHECK-GI-NEXT: add x9, x9, :lo12:no_block_store
|
|
; CHECK-GI-NEXT: ldr d0, [x8, :lo12:no_block_store]
|
|
; CHECK-GI-NEXT: ldr w0, [x9, #8]
|
|
; CHECK-GI-NEXT: ldr d1, [x9, #16]
|
|
; CHECK-GI-NEXT: ldr w1, [x9, #24]
|
|
; CHECK-GI-NEXT: bl callee_no_block
|
|
; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
%1 = load %T_NO_BLOCK, ptr @no_block_store
|
|
call void @callee_no_block(%T_NO_BLOCK %1)
|
|
ret void
|
|
}
|