Peter Collingbourne e3c72e1075
LowerTypeTests: Shrink check size by 1 instruction on x86.
We currently generate code like this on x86 for a jump table with 5 elements,
assuming the call target is in rbx:

lea global_addr(%rip), %rax # initialize temporary rax with base address
mov %rbx, %rcx              # initialize another temporary rcx for index (rbx will be used for the call, so it is still live)
sub %rax, %rcx              # compute `address - base`
ror $0x3, %rcx              # compute `(address - base) ror 3` i.e. index
cmp $0x4, %rcx              # check index <= 4
ja .Ltrap
[...]
.Ltrap:
ud1

A more efficient instruction sequence, that only needs one temporary
register and one fewer instruction, is possible by subtracting the
address we are testing from the fixed address instead of vice versa:

lea (global_addr + 4*8)(%rip), %rax # initialize temporary rax with address of last element
sub %rbx, %rax                      # compute `last element - address`
ror $0x3, %rax                      # compute `(last element - address) ror 3` i.e. 4 - index
cmp $0x4, %rax                      # check 4 - index <= 4 (same as above)
ja .Ltrap
[...]
.Ltrap:
ud1

Change LowerTypeTests to generate that sequence. As a consequence, the
order of bits in the bitsets is reversed. Because it doesn't matter how we
do the subtraction on other architectures (to the best of my knowledge),
do so unconditionally.

Reviewers: fmayer, vitalybuka

Reviewed By: fmayer

Pull Request: https://github.com/llvm/llvm-project/pull/142887
2025-06-06 12:43:24 -07:00

210 lines
9.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 2
;; Check the cases involving internal CFI instrumented functions where we do not expect functions to be merged.
; RUN: opt -S -passes=mergefunc < %s | FileCheck %s
; RUN: opt -S -passes=mergefunc,lowertypetests < %s | FileCheck --check-prefix=LOWERTYPETESTS %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux"
@0 = private unnamed_addr constant { i16, i16, [12 x i8] } { i16 -1, i16 0, [12 x i8] c"'int (int)'\00" }
; Function Attrs: noinline nounwind optnone
define dso_local i32 @f(i32 noundef %arg) #0 !type !3 !type !4 {
entry:
%arg.addr = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 %arg, ptr %arg.addr, align 4
store i32 0, ptr %b, align 4
%0 = load i32, ptr %arg.addr, align 4
%cmp = icmp sgt i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
store i32 1, ptr %a, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%1 = load i32, ptr %a, align 4
%2 = load i32, ptr %b, align 4
%add = add nsw i32 %1, %2
ret i32 %add
}
; Function Attrs: noinline nounwind optnone
define dso_local i32 @f_thunk(i32 noundef %arg) #0 !type !3 !type !4 {
entry:
%arg.addr = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 %arg, ptr %arg.addr, align 4
store i32 0, ptr %b, align 4
%0 = load i32, ptr %arg.addr, align 4
%cmp = icmp sgt i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
store i32 1, ptr %a, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%1 = load i32, ptr %a, align 4
%2 = load i32, ptr %b, align 4
%add = add nsw i32 %1, %2
ret i32 %add
}
; Function Attrs: noinline nounwind optnone
define dso_local i32 @g(i32 noundef %b) #0 !type !3 !type !4 {
entry:
%b.addr = alloca i32, align 4
%fp = alloca ptr, align 8
store i32 %b, ptr %b.addr, align 4
%0 = load i32, ptr %b.addr, align 4
%tobool = icmp ne i32 %0, 0
%1 = zext i1 %tobool to i64
%cond = select i1 %tobool, ptr @f, ptr @f_thunk
store ptr %cond, ptr %fp, align 8
%2 = load ptr, ptr %fp, align 8
%3 = call i1 @llvm.type.test(ptr %2, metadata !"_ZTSFiiE"), !nosanitize !5
br i1 %3, label %cont, label %trap, !nosanitize !5
trap: ; preds = %entry
call void @llvm.ubsantrap(i8 2) #3, !nosanitize !5
unreachable, !nosanitize !5
cont: ; preds = %entry
%4 = load i32, ptr %b.addr, align 4
%call = call i32 %2(i32 noundef %4)
ret i32 %call
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i1 @llvm.type.test(ptr, metadata) #1
; Function Attrs: cold noreturn nounwind
declare void @llvm.ubsantrap(i8 immarg) #2
attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #2 = { cold noreturn nounwind }
attributes #3 = { noreturn nounwind }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 0}
!3 = !{i64 0, !"_ZTSFiiE"}
!4 = !{i64 0, !"_ZTSFiiE.generalized"}
!5 = !{}
; CHECK-LABEL: define dso_local i32 @f
; CHECK-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type [[META2:![0-9]+]] !type [[META3:![0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
; CHECK-NEXT: store i32 0, ptr [[B]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i32 1, ptr [[A]], align 4
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i32 [[ADD]]
;
;
; CHECK-LABEL: define dso_local i32 @g
; CHECK-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8
; CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR]], align 4
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i64
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr @f, ptr @f_thunk
; CHECK-NEXT: store ptr [[COND]], ptr [[FP]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.type.test(ptr [[TMP2]], metadata !"_ZTSFiiE"), !nosanitize [[META4:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP3]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META4]]
; CHECK: trap:
; CHECK-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize [[META4]]
; CHECK-NEXT: unreachable, !nosanitize [[META4]]
; CHECK: cont:
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4
; CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP2]](i32 noundef [[TMP4]])
; CHECK-NEXT: ret i32 [[CALL]]
;
;
; CHECK-LABEL: define dso_local i32 @f_thunk
; CHECK-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
;
; LOWERTYPETESTS-LABEL: define dso_local i32 @f
; LOWERTYPETESTS-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type [[META2:![0-9]+]] !type [[META3:![0-9]+]] {
; LOWERTYPETESTS-NEXT: entry:
; LOWERTYPETESTS-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4
; LOWERTYPETESTS-NEXT: [[A:%.*]] = alloca i32, align 4
; LOWERTYPETESTS-NEXT: [[B:%.*]] = alloca i32, align 4
; LOWERTYPETESTS-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
; LOWERTYPETESTS-NEXT: store i32 0, ptr [[B]], align 4
; LOWERTYPETESTS-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
; LOWERTYPETESTS-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 0
; LOWERTYPETESTS-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; LOWERTYPETESTS: if.then:
; LOWERTYPETESTS-NEXT: store i32 1, ptr [[A]], align 4
; LOWERTYPETESTS-NEXT: br label [[IF_END]]
; LOWERTYPETESTS: if.end:
; LOWERTYPETESTS-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4
; LOWERTYPETESTS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
; LOWERTYPETESTS-NEXT: ret i32 [[ADD]]
;
;
; LOWERTYPETESTS-LABEL: define dso_local i32 @g
; LOWERTYPETESTS-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] {
; LOWERTYPETESTS-NEXT: entry:
; LOWERTYPETESTS-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
; LOWERTYPETESTS-NEXT: [[FP:%.*]] = alloca ptr, align 8
; LOWERTYPETESTS-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
; LOWERTYPETESTS-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR]], align 4
; LOWERTYPETESTS-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
; LOWERTYPETESTS-NEXT: [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i64
; LOWERTYPETESTS-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr @.cfi.jumptable, ptr getelementptr inbounds ([2 x [8 x i8]], ptr @.cfi.jumptable, i64 0, i64 1)
; LOWERTYPETESTS-NEXT: store ptr [[COND]], ptr [[FP]], align 8
; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP]], align 8
; LOWERTYPETESTS-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
; LOWERTYPETESTS-NEXT: [[TMP4:%.*]] = sub i64 ptrtoint (ptr getelementptr (i8, ptr @.cfi.jumptable, i64 8) to i64), [[TMP3]]
; LOWERTYPETESTS-NEXT: [[TMP5:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP4]], i64 [[TMP4]], i64 3)
; LOWERTYPETESTS-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP5]], 1
; LOWERTYPETESTS-NEXT: br i1 [[TMP6]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META4:![0-9]+]]
; LOWERTYPETESTS: trap:
; LOWERTYPETESTS-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !nosanitize [[META4]]
; LOWERTYPETESTS-NEXT: unreachable, !nosanitize [[META4]]
; LOWERTYPETESTS: cont:
; LOWERTYPETESTS-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4
; LOWERTYPETESTS-NEXT: [[CALL:%.*]] = call i32 [[TMP2]](i32 noundef [[TMP7]])
; LOWERTYPETESTS-NEXT: ret i32 [[CALL]]
;
;
; LOWERTYPETESTS-LABEL: define dso_local i32 @f_thunk
; LOWERTYPETESTS-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] {
; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = tail call i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]]
; LOWERTYPETESTS-NEXT: ret i32 [[TMP2]]
;
;
; LOWERTYPETESTS-LABEL: define private void @.cfi.jumptable
; LOWERTYPETESTS-SAME: () #[[ATTR3:[0-9]+]] align 8 {
; LOWERTYPETESTS-NEXT: entry:
; LOWERTYPETESTS-NEXT: call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(ptr @f)
; LOWERTYPETESTS-NEXT: call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(ptr @f_thunk)
; LOWERTYPETESTS-NEXT: unreachable
;