
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. There was a similar issue reported for `extractelement` which was fixed in a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c Co-authored-by: Manish Kausik H <hmamishkausik@gmail.com>
121 lines
3.9 KiB
LLVM
121 lines
3.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s
|
|
|
|
define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
|
|
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
|
|
ret <8 x i32> %a
|
|
}
|
|
|
|
define <8 x i32> @foo2(<8 x i32> %arg1, i32 %n) alignstack(8) #0 {
|
|
; CHECK-LABEL: foo2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movl $42, -32(%rsp,%rdi,4)
|
|
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
|
|
ret <8 x i32> %a
|
|
}
|
|
|
|
define <8 x i32> @foo3(<8 x i32> %arg1, i32 %n) alignstack(16) #0 {
|
|
; CHECK-LABEL: foo3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
|
|
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
|
|
ret <8 x i32> %a
|
|
}
|
|
|
|
define <8 x i32> @foo4(<8 x i32> %arg1, i32 %n) alignstack(64) #0 {
|
|
; CHECK-LABEL: foo4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovaps %ymm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movl $42, -56(%rsp,%rdi,4)
|
|
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
|
|
ret <8 x i32> %a
|
|
}
|
|
|
|
define <8 x i32> @foo5(<8 x i32> %arg1, i32 %n) alignstack(256) #0 {
|
|
; CHECK-LABEL: foo5:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: subq $120, %rsp
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movl $42, 64(%rsp,%rdi,4)
|
|
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: addq $120, %rsp
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
|
|
ret <8 x i32> %a
|
|
}
|
|
|
|
define <8 x i16> @foo6(<8 x i16> %arg1, i32 %n) #0 {
|
|
; CHECK-LABEL: foo6:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movw $42, -24(%rsp,%rdi,2)
|
|
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i16> %arg1, i16 42, i32 %n
|
|
ret <8 x i16> %a
|
|
}
|
|
|
|
define <8 x i8> @foo7(<8 x i8> %arg1, i32 %n) #0 {
|
|
; CHECK-LABEL: foo7:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: movb $42, -24(%rsp,%rdi)
|
|
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i8> %arg1, i8 42, i32 %n
|
|
ret <8 x i8> %a
|
|
}
|
|
|
|
define <8 x i64> @foo8(<8 x i64> %arg1, i32 %n) #0 {
|
|
; CHECK-LABEL: foo8:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; CHECK-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: andl $7, %edi
|
|
; CHECK-NEXT: movq $42, -72(%rsp,%rdi,8)
|
|
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm1
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = insertelement <8 x i64> %arg1, i64 42, i32 %n
|
|
ret <8 x i64> %a
|
|
}
|
|
|
|
attributes #0 = { "no-realign-stack" nounwind }
|