llvm-project/llvm/test/CodeGen/X86/widen_load-0.ll
Roman Lebedev 16facf1ca6
[DAGCombiner][TLI] Do not fuse bitcast to <1 x ?> into a load/store of a vector
Single-element vectors are legalized by splitting,
so the the memory operations would also get scalarized.
While we do have some support to reconstruct scalarized loads,
we clearly don't catch everything.

The comment for the affected AArch64 store suggests that
having two stores was the desired outcome in the first place.

This was showing as a source of *many* regressions
with more aggressive ZERO_EXTEND_VECTOR_INREG recognition.
2022-12-31 03:49:43 +03:00

39 lines
1.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-linux -mattr=+sse4.2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.2 | FileCheck %s --check-prefix=X64
; PR4891
; Both loads should happen before either store.
define void @short2_int_swap(ptr nocapture %b, ptr nocapture %c) nounwind {
; X86-LABEL: short2_int_swap:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl (%eax), %esi
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: short2_int_swap:
; X64: # %bb.0: # %entry
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: movl %eax, (%rsi)
; X64-NEXT: retq
entry:
%0 = load <2 x i16>, ptr %b, align 2 ; <<2 x i16>> [#uses=1]
%1 = load i32, ptr %c, align 4 ; <i32> [#uses=1]
%tmp1 = bitcast i32 %1 to <2 x i16> ; <<2 x i16>> [#uses=1]
store <2 x i16> %tmp1, ptr %b, align 2
%tmp5 = bitcast <2 x i16> %0 to <1 x i32> ; <<1 x i32>> [#uses=1]
%tmp3 = extractelement <1 x i32> %tmp5, i32 0 ; <i32> [#uses=1]
store i32 %tmp3, ptr %c, align 4
ret void
}