Alexey Bataev a0086add2e [SLP]Improve gathering of scalar elements.
1. Better sorting of scalars to be gathered. Trying to insert
   constants/arguments/instructions-out-of-loop at first and only then
   the instructions which are inside the loop. It improves hoisting of
   invariant insertelements instructions.
2. Better detection of shuffle candidates in gathering function.
3. The cost of insertelement for constants is 0.

Part of D57059.

Differential Revision: https://reviews.llvm.org/D103458
2021-06-09 05:23:21 -07:00

94 lines
4.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct.GPar.0.16.26 = type { [0 x double], double }
@d = external global double, align 8
declare %struct.GPar.0.16.26* @Rf_gpptr(...)
define void @Rf_GReset() {
; CHECK-LABEL: @Rf_GReset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]]
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN6:%.*]], label [[IF_END7]]
; CHECK: if.then6:
; CHECK-NEXT: br label [[IF_END7]]
; CHECK: if.end7:
; CHECK-NEXT: ret void
;
entry:
%sub = fsub double -0.000000e+00, undef
%0 = load double, double* @d, align 8
%sub1 = fsub double -0.000000e+00, %0
br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
if.then: ; preds = %entry
%sub2 = fsub double %sub, undef
%div.i = fdiv double %sub2, undef
%sub4 = fsub double %sub1, undef
%div.i16 = fdiv double %sub4, undef
%cmp = fcmp ogt double %div.i, %div.i16
br i1 %cmp, label %if.then6, label %if.end7
if.then6: ; preds = %if.then
br label %if.end7
if.end7: ; preds = %if.then6, %if.then, %entry
%g.0 = phi double [ 0.000000e+00, %if.then6 ], [ %sub, %if.then ], [ %sub, %entry ]
ret void
}
define void @Rf_GReset_unary_fneg() {
; CHECK-LABEL: @Rf_GReset_unary_fneg(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]]
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN6:%.*]], label [[IF_END7]]
; CHECK: if.then6:
; CHECK-NEXT: br label [[IF_END7]]
; CHECK: if.end7:
; CHECK-NEXT: ret void
;
entry:
%sub = fneg double undef
%0 = load double, double* @d, align 8
%sub1 = fneg double %0
br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
if.then: ; preds = %entry
%sub2 = fsub double %sub, undef
%div.i = fdiv double %sub2, undef
%sub4 = fsub double %sub1, undef
%div.i16 = fdiv double %sub4, undef
%cmp = fcmp ogt double %div.i, %div.i16
br i1 %cmp, label %if.then6, label %if.end7
if.then6: ; preds = %if.then
br label %if.end7
if.end7: ; preds = %if.then6, %if.then, %entry
%g.0 = phi double [ 0.000000e+00, %if.then6 ], [ %sub, %if.then ], [ %sub, %entry ]
ret void
}