
This is a follow up to 924907bc6, and is mostly motivated by consistency but does include one additional optimization. In general, we prefer 0.0 over -0.0 as the identity value for an fadd. We use that value in several places, but don't in others. So, let's be consistent and use the same identity (when nsz allows) everywhere. This creates a bunch of test churn, but due to 924907bc6, most of that churn doesn't actually indicate a change in codegen. The exception is that this change enables the use of 0.0 for nsz, but *not* reasoc, fadd reductions. Or said differently, it allows the neutral value of an ordered fadd reduction to be 0.0.
99 lines
5.7 KiB
LLVM
99 lines
5.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=GFX9 %s
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=VI %s
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=CI %s
|
|
|
|
define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
|
|
; GFX9-LABEL: @vectorize_v2f16_loop(
|
|
; GFX9-NEXT: entry:
|
|
; GFX9-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; GFX9: vector.ph:
|
|
; GFX9-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; GFX9: vector.body:
|
|
; GFX9-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; GFX9-NEXT: [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
|
|
; GFX9-NEXT: [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
|
|
; GFX9-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
|
|
; GFX9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 4
|
|
; GFX9-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
|
|
; GFX9-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP1]], align 2
|
|
; GFX9-NEXT: [[TMP2]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; GFX9-NEXT: [[TMP3]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
|
|
; GFX9-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; GFX9-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
|
|
; GFX9-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; GFX9: middle.block:
|
|
; GFX9-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP3]], [[TMP2]]
|
|
; GFX9-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[BIN_RDX]])
|
|
; GFX9-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; GFX9: scalar.ph:
|
|
; GFX9-NEXT: br label [[FOR_BODY:%.*]]
|
|
; GFX9: for.body:
|
|
; GFX9-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; GFX9: for.end:
|
|
; GFX9-NEXT: [[ADD_LCSSA:%.*]] = phi half [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
; GFX9-NEXT: ret half [[ADD_LCSSA]]
|
|
;
|
|
; VI-LABEL: @vectorize_v2f16_loop(
|
|
; VI-NEXT: entry:
|
|
; VI-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VI: vector.ph:
|
|
; VI-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VI: vector.body:
|
|
; VI-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VI-NEXT: [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
|
|
; VI-NEXT: [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
|
|
; VI-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
|
|
; VI-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 4
|
|
; VI-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
|
|
; VI-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP1]], align 2
|
|
; VI-NEXT: [[TMP2]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; VI-NEXT: [[TMP3]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
|
|
; VI-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; VI-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
|
|
; VI-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; VI: middle.block:
|
|
; VI-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP3]], [[TMP2]]
|
|
; VI-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[BIN_RDX]])
|
|
; VI-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VI: scalar.ph:
|
|
; VI-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VI: for.body:
|
|
; VI-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; VI: for.end:
|
|
; VI-NEXT: [[ADD_LCSSA:%.*]] = phi half [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
; VI-NEXT: ret half [[ADD_LCSSA]]
|
|
;
|
|
; CI-LABEL: @vectorize_v2f16_loop(
|
|
; CI-NEXT: entry:
|
|
; CI-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CI: for.body:
|
|
; CI-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CI-NEXT: [[Q_04:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
|
; CI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDVARS_IV]]
|
|
; CI-NEXT: [[TMP0:%.*]] = load half, ptr addrspace(1) [[ARRAYIDX]], align 2
|
|
; CI-NEXT: [[ADD]] = fadd fast half [[Q_04]], [[TMP0]]
|
|
; CI-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CI-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
|
|
; CI-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CI: for.end:
|
|
; CI-NEXT: ret half [[ADD]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%q.04 = phi half [ 0.0, %entry ], [ %add, %for.body ]
|
|
%arrayidx = getelementptr inbounds half, ptr addrspace(1) %s, i64 %indvars.iv
|
|
%0 = load half, ptr addrspace(1) %arrayidx, align 2
|
|
%add = fadd fast half %q.04, %0
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 256
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
%add.lcssa = phi half [ %add, %for.body ]
|
|
ret half %add.lcssa
|
|
}
|