Nikita Popov eecb99c5f6 [Tests] Add disjoint flag to some tests (NFC)
These tests rely on SCEV looking recognizing an "or" with no common
bits as an "add". Add the disjoint flag to relevant or instructions
in preparation for switching SCEV to use the flag instead of the
ValueTracking query. The IR with disjoint flag matches what
InstCombine would produce.
2023-12-05 14:09:36 +01:00

53 lines
2.3 KiB
LLVM

; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
declare i64 @_Z12get_local_idj(i32)
declare i64 @_Z12get_group_idj(i32)
declare double @llvm.fmuladd.f64(double, double, double)
; CHECK-LABEL: @factorizedVsNonfactorizedAccess(
; CHECK: load <2 x float>
; CHECK: store <2 x float>
define amdgpu_kernel void @factorizedVsNonfactorizedAccess(ptr addrspace(1) nocapture %c) {
entry:
%call = tail call i64 @_Z12get_local_idj(i32 0)
%call1 = tail call i64 @_Z12get_group_idj(i32 0)
%div = lshr i64 %call, 4
%div2 = lshr i64 %call1, 3
%mul = shl i64 %div2, 7
%rem = shl i64 %call, 3
%mul3 = and i64 %rem, 120
%add = or i64 %mul, %mul3
%rem4 = shl i64 %call1, 7
%mul5 = and i64 %rem4, 896
%mul6 = shl nuw nsw i64 %div, 3
%add7 = add nuw i64 %mul5, %mul6
%mul9 = shl i64 %add7, 10
%add10 = add i64 %mul9, %add
%arrayidx = getelementptr inbounds float, ptr addrspace(1) %c, i64 %add10
%load1 = load float, ptr addrspace(1) %arrayidx, align 4
%conv = fpext float %load1 to double
%mul11 = fmul double %conv, 0x3FEAB481D8F35506
%conv12 = fptrunc double %mul11 to float
%conv18 = fpext float %conv12 to double
%storeval1 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv18)
%cstoreval1 = fptrunc double %storeval1 to float
store float %cstoreval1, ptr addrspace(1) %arrayidx, align 4
%add23 = or disjoint i64 %add10, 1
%arrayidx24 = getelementptr inbounds float, ptr addrspace(1) %c, i64 %add23
%load2 = load float, ptr addrspace(1) %arrayidx24, align 4
%conv25 = fpext float %load2 to double
%mul26 = fmul double %conv25, 0x3FEAB481D8F35506
%conv27 = fptrunc double %mul26 to float
%conv34 = fpext float %conv27 to double
%storeval2 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv34)
%cstoreval2 = fptrunc double %storeval2 to float
store float %cstoreval2, ptr addrspace(1) %arrayidx24, align 4
ret void
}