Sander de Smalen da4a5a46b3 [InstCombine] Promote expression tree with @llvm.vscale when zero-extending result.
The LoopVectorizer emits the (scaled) element count as i32, which for
scalable VFs results in calls to @llvm.vscale.i32(). This value is scaled
and further zero-extended to i64.

The zero-extend can be folded away by executing the whole expression in i64
type using @llvm.vscale.i64(). Any logical `and` that would needed to mask
the result can be further folded away by KnownBits analysis when
vscale_range is set.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D143016
2023-02-02 11:18:16 +00:00

46 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='instcombine' -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define i64 @promote_vscale_i32_to_i64() {
; CHECK-LABEL: @promote_vscale_i32_to_i64(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[EXT:%.*]] = and i64 [[VSCALE]], 4294967295
; CHECK-NEXT: ret i64 [[EXT]]
;
%vscale = call i32 @llvm.vscale.i32()
%ext = zext i32 %vscale to i64
ret i64 %ext
}
define i64 @pomote_zext_shl_vscale_i32_to_i64() {
; CHECK-LABEL: @pomote_zext_shl_vscale_i32_to_i64(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[VSCALE]], 3
; CHECK-NEXT: [[EXT:%.*]] = and i64 [[SHL]], 4294967288
; CHECK-NEXT: ret i64 [[EXT]]
;
%vscale = call i32 @llvm.vscale.i32()
%shl = shl i32 %vscale, 3
%ext = zext i32 %shl to i64
ret i64 %ext
}
; Same test as @pomote_zext_shl_vscale_i32_to_i64, but with the
; vscale_range attribute so that the 'and' is folded away.
define i64 @free_zext_vscale_shl_i32_to_i64() #0 {
; CHECK-LABEL: @free_zext_vscale_shl_i32_to_i64(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i64 [[VSCALE]], 3
; CHECK-NEXT: ret i64 [[SHL]]
;
%vscale = call i32 @llvm.vscale.i32()
%shl = shl i32 %vscale, 3
%ext = zext i32 %shl to i64
ret i64 %ext
}
declare i32 @llvm.vscale.i32()
attributes #0 = { vscale_range(1,16) }