llvm-project/llvm/test/CodeGen/X86/trunc-vector-width.ll
Simon Pilgrim d1592a966b
[X86] X86FixupVectorConstantsPass - use scheduler model to avoid regressions (#140028)
When attempting to replace a full vector constant load with an instruction that uses a smaller constant, check the scheduler model to ensure the instruction isn't slower.

Throughput must not regress, but allow a small increase in latency based on how much constant data we're saving (I've used a simple estimate of 1 cycle per 128-bits of data saved).

NOTE: this currently ignores hoisted constant loads where the slower instruction might be acceptable.

Fixes #135998
2025-05-28 11:02:37 +01:00

31 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s
define void @test(ptr %a0) #0 {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqu (%rdi), %ymm0
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = [0,4,0,0]
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = mem[0],ymm0[1,2,3,4,5,6,7]
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,20,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm0
; CHECK-NEXT: vpextrb $1, %xmm0, (%rax)
; CHECK-NEXT: vpextrb $4, %xmm0, (%rax)
; CHECK-NEXT: vpextrb $8, %xmm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%load = load <64 x i8>, ptr %a0, align 1
%shuf = shufflevector <64 x i8> %load, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
%xor = xor <16 x i8> %shuf, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%i1 = extractelement <16 x i8> %xor, i32 1
%i2 = extractelement <16 x i8> %xor, i32 4
%i3 = extractelement <16 x i8> %xor, i32 8
store i8 %i1, ptr undef, align 1
store i8 %i2, ptr undef, align 1
store i8 %i3, ptr undef, align 1
ret void
}
attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }