
The i386 psABI specifies that `__float128` has 16 byte alignment and must be passed on the stack; however, LLVM currently stores it in a stack slot that has an offset of 4. Add a custom lowering to correct this alignment to 16-byte. i386 does not specify an `__int128`, but it seems reasonable to keep the same behavior as `__float128` so this is changed as well. There also isn't a good way to distinguish whether a set of four registers came from an integer or a float. The main test demonstrating this change is `store_perturbed` in `llvm/test/CodeGen/X86/i128-fp128-abi.ll`. Referenced ABI: https://gitlab.com/x86-psABIs/i386-ABI/-/wikis/uploads/14c05f1b1e156e0e46b61bfa7c1df1e2/intel386-psABI-2020-08-07.pdf Fixes: https://github.com/llvm/llvm-project/issues/77401
135 lines
3.8 KiB
LLVM
135 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X86
|
|
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64
|
|
|
|
; Make sure none of these crash, and that the power-of-two transformations
|
|
; trigger correctly.
|
|
|
|
define i128 @test1(i128 %x) nounwind {
|
|
; X86-LABEL: test1:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %ebp
|
|
; X86-NEXT: movl %esp, %ebp
|
|
; X86-NEXT: pushl %edi
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: andl $-16, %esp
|
|
; X86-NEXT: movl 8(%ebp), %eax
|
|
; X86-NEXT: movl 36(%ebp), %ecx
|
|
; X86-NEXT: movl %ecx, %esi
|
|
; X86-NEXT: sarl $31, %esi
|
|
; X86-NEXT: movl %esi, %edx
|
|
; X86-NEXT: shrl $30, %edx
|
|
; X86-NEXT: movl 24(%ebp), %edi
|
|
; X86-NEXT: addl %esi, %edi
|
|
; X86-NEXT: adcl 28(%ebp), %esi
|
|
; X86-NEXT: adcl 32(%ebp), %edx
|
|
; X86-NEXT: adcl $0, %ecx
|
|
; X86-NEXT: shrdl $2, %ecx, %edx
|
|
; X86-NEXT: movl %ecx, %esi
|
|
; X86-NEXT: sarl $2, %esi
|
|
; X86-NEXT: sarl $31, %ecx
|
|
; X86-NEXT: movl %ecx, 12(%eax)
|
|
; X86-NEXT: movl %ecx, 8(%eax)
|
|
; X86-NEXT: movl %esi, 4(%eax)
|
|
; X86-NEXT: movl %edx, (%eax)
|
|
; X86-NEXT: leal -8(%ebp), %esp
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: popl %edi
|
|
; X86-NEXT: popl %ebp
|
|
; X86-NEXT: retl $4
|
|
;
|
|
; X64-LABEL: test1:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq %rsi, %rax
|
|
; X64-NEXT: sarq $63, %rax
|
|
; X64-NEXT: movq %rax, %rdx
|
|
; X64-NEXT: shrq $62, %rdx
|
|
; X64-NEXT: addq %rdi, %rax
|
|
; X64-NEXT: adcq %rsi, %rdx
|
|
; X64-NEXT: movq %rdx, %rax
|
|
; X64-NEXT: sarq $2, %rax
|
|
; X64-NEXT: sarq $63, %rdx
|
|
; X64-NEXT: retq
|
|
%tmp = sdiv i128 %x, 73786976294838206464
|
|
ret i128 %tmp
|
|
}
|
|
|
|
define i128 @test2(i128 %x) nounwind {
|
|
; X86-LABEL: test2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %ebp
|
|
; X86-NEXT: movl %esp, %ebp
|
|
; X86-NEXT: pushl %ebx
|
|
; X86-NEXT: pushl %edi
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: andl $-16, %esp
|
|
; X86-NEXT: subl $16, %esp
|
|
; X86-NEXT: movl 36(%ebp), %eax
|
|
; X86-NEXT: movl %eax, %edx
|
|
; X86-NEXT: sarl $31, %edx
|
|
; X86-NEXT: movl %edx, %ecx
|
|
; X86-NEXT: shrl $30, %ecx
|
|
; X86-NEXT: movl 24(%ebp), %esi
|
|
; X86-NEXT: addl %edx, %esi
|
|
; X86-NEXT: adcl 28(%ebp), %edx
|
|
; X86-NEXT: adcl 32(%ebp), %ecx
|
|
; X86-NEXT: adcl $0, %eax
|
|
; X86-NEXT: shrdl $2, %eax, %ecx
|
|
; X86-NEXT: movl %eax, %esi
|
|
; X86-NEXT: sarl $31, %esi
|
|
; X86-NEXT: sarl $2, %eax
|
|
; X86-NEXT: xorl %edx, %edx
|
|
; X86-NEXT: negl %ecx
|
|
; X86-NEXT: movl $0, %edi
|
|
; X86-NEXT: sbbl %eax, %edi
|
|
; X86-NEXT: movl $0, %ebx
|
|
; X86-NEXT: sbbl %esi, %ebx
|
|
; X86-NEXT: sbbl %esi, %edx
|
|
; X86-NEXT: movl 8(%ebp), %eax
|
|
; X86-NEXT: movl %ecx, (%eax)
|
|
; X86-NEXT: movl %edi, 4(%eax)
|
|
; X86-NEXT: movl %ebx, 8(%eax)
|
|
; X86-NEXT: movl %edx, 12(%eax)
|
|
; X86-NEXT: leal -12(%ebp), %esp
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: popl %edi
|
|
; X86-NEXT: popl %ebx
|
|
; X86-NEXT: popl %ebp
|
|
; X86-NEXT: retl $4
|
|
;
|
|
; X64-LABEL: test2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq %rsi, %rcx
|
|
; X64-NEXT: sarq $63, %rcx
|
|
; X64-NEXT: movq %rcx, %rax
|
|
; X64-NEXT: shrq $62, %rax
|
|
; X64-NEXT: addq %rdi, %rcx
|
|
; X64-NEXT: adcq %rsi, %rax
|
|
; X64-NEXT: movq %rax, %rcx
|
|
; X64-NEXT: sarq $63, %rcx
|
|
; X64-NEXT: sarq $2, %rax
|
|
; X64-NEXT: xorl %edx, %edx
|
|
; X64-NEXT: negq %rax
|
|
; X64-NEXT: sbbq %rcx, %rdx
|
|
; X64-NEXT: retq
|
|
%tmp = sdiv i128 %x, -73786976294838206464
|
|
ret i128 %tmp
|
|
}
|
|
|
|
define i128 @test3(i128 %x) nounwind {
|
|
; X86-LABEL: test3:
|
|
; X86 doesn't have __divti3, so the urem is expanded into a loop.
|
|
; X86: udiv-do-while
|
|
;
|
|
; X64-LABEL: test3:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: movq $-3, %rdx
|
|
; X64-NEXT: movq $-5, %rcx
|
|
; X64-NEXT: callq __divti3@PLT
|
|
; X64-NEXT: popq %rcx
|
|
; X64-NEXT: retq
|
|
%tmp = sdiv i128 %x, -73786976294838206467
|
|
ret i128 %tmp
|
|
}
|