diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index d3da7524eaf1..6e43b897caef 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefixes=CHECK,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 define i32 @veccond128(<4 x i32> %input) { ; SSE2-LABEL: veccond128: @@ -388,3 +388,163 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { %t2 = select i1 %t1, i32 %a, i32 %b ret i32 %t2 } + +define i1 @vecmp_load64x2(ptr %p0) { +; CHECK-LABEL: vecmp_load64x2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: orq 8(%rdi), %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %p1 = getelementptr i8, ptr %p0, i64 8 + %i0 = load i64, ptr %p0, align 1 + %i1 = load i64, ptr %p1, align 1 + %or = or i64 %i0, %i1 + %ne = icmp ne i64 %or, 0 + %zx = zext i1 %ne to i32 + %eq = icmp eq i32 %zx, 0 + ret i1 %eq +} + +define i1 @vecmp_load64x4(ptr %p0) { +; CHECK-LABEL: vecmp_load64x4: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: orq 16(%rdi), %rax +; CHECK-NEXT: orq 24(%rdi), %rcx +; CHECK-NEXT: orq %rax, %rcx +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %p1 = getelementptr i8, ptr %p0, i64 8 + %p2 = getelementptr i8, ptr %p0, i64 16 + %p3 = getelementptr i8, ptr %p0, i64 24 + %i0 = load i64, ptr %p0, align 1 + %i1 = load i64, ptr %p1, align 1 + %i2 = load i64, ptr %p2, align 1 + %i3 = load i64, ptr %p3, align 1 + %or02 = or i64 %i0, %i2 + %or13 = or i64 %i1, %i3 + %or = or i64 %or02, %or13 + %ne = icmp ne i64 %or, 0 + %zx = zext i1 %ne to i32 + %eq = icmp eq i32 %zx, 0 + ret i1 %eq +} + +define i1 @vecmp_load128x2(ptr %p0) { +; CHECK-LABEL: vecmp_load128x2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: orq 24(%rdi), %rcx +; CHECK-NEXT: orq 16(%rdi), %rax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %p1 = getelementptr i8, ptr %p0, i64 16 + %i0 = load i128, ptr %p0, align 1 + %i1 = load i128, ptr %p1, align 1 + %or = or i128 %i0, %i1 + %ne = icmp ne i128 %or, 0 + %zx = zext i1 %ne to i32 + %eq = icmp eq i32 %zx, 0 + ret i1 %eq +} + +define i1 @vecmp_load128x4(ptr %p0) { +; CHECK-LABEL: vecmp_load128x4: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: movq 24(%rdi), %rdx +; CHECK-NEXT: movq 16(%rdi), %rsi +; CHECK-NEXT: orq 32(%rdi), %rax +; CHECK-NEXT: orq 40(%rdi), %rcx +; CHECK-NEXT: orq 48(%rdi), %rsi +; CHECK-NEXT: orq %rax, %rsi +; CHECK-NEXT: orq 56(%rdi), %rdx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: orq %rsi, %rdx +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %p1 = getelementptr i8, ptr %p0, i64 16 + %p2 = getelementptr i8, ptr %p0, i64 32 + %p3 = getelementptr i8, ptr %p0, i64 48 + %i0 = load i128, ptr %p0, align 1 + %i1 = load i128, ptr %p1, align 1 + %i2 = load i128, ptr %p2, align 1 + %i3 = load i128, ptr %p3, align 1 + %or02 = or i128 %i0, %i2 + %or13 = or i128 %i1, %i3 + %or = or i128 %or02, %or13 + %ne = icmp ne i128 %or, 0 + %zx = zext i1 %ne to i32 + %eq = icmp eq i32 %zx, 0 + ret i1 %eq +} + +; PR144861 +define i1 @vecmp_load256x2(ptr %p0) { +; CHECK-LABEL: vecmp_load256x2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq 24(%rdi), %rax +; CHECK-NEXT: movq (%rdi), %rcx +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: movq 16(%rdi), %rsi +; CHECK-NEXT: orq 48(%rdi), %rsi +; CHECK-NEXT: orq 32(%rdi), %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: orq 56(%rdi), %rax +; CHECK-NEXT: orq 40(%rdi), %rdx +; CHECK-NEXT: orq %rax, %rdx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %p1 = getelementptr i8, ptr %p0, i64 32 + %i0 = load i256, ptr %p0, align 1 + %i1 = load i256, ptr %p1, align 1 + %or = or i256 %i0, %i1 + %ne = icmp ne i256 %or, 0 + %zx = zext i1 %ne to i32 + %eq = icmp eq i32 %zx, 0 + ret i1 %eq +} + +define i1 @vecmp_load512x2(ptr %p0) { +; CHECK-LABEL: vecmp_load512x2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq 24(%rdi), %rax +; CHECK-NEXT: movq 56(%rdi), %rdx +; CHECK-NEXT: movq 40(%rdi), %rsi +; CHECK-NEXT: movq 16(%rdi), %rcx +; CHECK-NEXT: movq 48(%rdi), %r8 +; CHECK-NEXT: movq (%rdi), %r9 +; CHECK-NEXT: movq 8(%rdi), %r10 +; CHECK-NEXT: movq 32(%rdi), %r11 +; CHECK-NEXT: orq 96(%rdi), %r11 +; CHECK-NEXT: orq 64(%rdi), %r9 +; CHECK-NEXT: orq %r11, %r9 +; CHECK-NEXT: orq 112(%rdi), %r8 +; CHECK-NEXT: orq 80(%rdi), %rcx +; CHECK-NEXT: orq %r8, %rcx +; CHECK-NEXT: orq %r9, %rcx +; CHECK-NEXT: orq 104(%rdi), %rsi +; CHECK-NEXT: orq 72(%rdi), %r10 +; CHECK-NEXT: orq %rsi, %r10 +; CHECK-NEXT: orq 120(%rdi), %rdx +; CHECK-NEXT: orq 88(%rdi), %rax +; CHECK-NEXT: orq %rdx, %rax +; CHECK-NEXT: orq %r10, %rax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %p1 = getelementptr i8, ptr %p0, i64 64 + %i0 = load i512, ptr %p0, align 1 + %i1 = load i512, ptr %p1, align 1 + %or = or i512 %i0, %i1 + %ne = icmp ne i512 %or, 0 + %zx = zext i1 %ne to i32 + %eq = icmp eq i32 %zx, 0 + ret i1 %eq +}