From 2bfca35614661bb60ef379f4866a32c4c19481e4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 4 Jul 2022 08:33:21 -0700 Subject: [PATCH] [X86] Disable combineVectorSizedSetCCEquality for soft float. The vector types aren't legal with soft float. Also disable under NoImplicitFloat for good measure. Fixes PR56351. Differential Revision: https://reviews.llvm.org/D129060 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++++++--- llvm/test/CodeGen/X86/pr56351.ll | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr56351.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 61c1fd25031d..379b4ed931a1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51654,9 +51654,13 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. - if ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX()) || - (OpSize == 512 && Subtarget.useAVX512Regs())) { + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && + ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX()) || + (OpSize == 512 && Subtarget.useAVX512Regs()))) { bool HasPT = Subtarget.hasSSE41(); // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened diff --git a/llvm/test/CodeGen/X86/pr56351.ll b/llvm/test/CodeGen/X86/pr56351.ll new file mode 100644 index 000000000000..d9473b268008 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr56351.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+soft-float | FileCheck %s + +define i1 @foo(i128* %x, i128* %y) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: xorq 8(%rsi), %rcx +; CHECK-NEXT: xorq (%rsi), %rax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %a = load i128, i128* %x, align 16 + %b = load i128, i128* %y, align 16 + %c = icmp eq i128 %a, %b + ret i1 %c +}