llvm-project/llvm/test/Transforms/IROutliner/outlining-multiple-exits-diff-outputs.ll
Andrew Litteken c172f1ad39 [IROutliner] Adding supports for multiple exits
When we start outlining across branches, there is the possibility that we will have two different blocks with different output locations, or a single branch that goes to two blocks outside of the region that is being outlined. While the CodeExtractor provides most of the mechanisms by using the return value of the extracted function as the input to a switch statement to correctly branch to the correct location, we need special handling for different output schemas to each location.

This is done by repeating the existing storing scheme for each different exit block. We have a map from the return values used, to the basic block that is used to store the outputs for that particular exit block within the outlined function. Then if needed, we create a switch statement for each return block to branch to the correct set of stored outputs.

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D106993
2021-09-08 08:58:07 -07:00

230 lines
9.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; Here we have multiple exits, but the different sources, different outputs are
; needed, this checks that they are handled by separate switch statements.
define void @outline_outputs1() #0 {
entry:
%output = alloca i32, align 4
%result = alloca i32, align 4
%output2 = alloca i32, align 4
%result2 = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
br label %block_2
block_1:
%a2 = alloca i32, align 4
%b2 = alloca i32, align 4
br label %block_2
block_2:
%a2val = load i32, i32* %a
%b2val = load i32, i32* %b
%add2 = add i32 2, %a2val
%mul2 = mul i32 2, %b2val
br label %block_5
block_3:
%aval = load i32, i32* %a
%bval = load i32, i32* %b
%add = add i32 2, %aval
%mul = mul i32 2, %bval
br label %block_4
block_4:
store i32 %add, i32* %output, align 4
store i32 %mul, i32* %result, align 4
br label %block_6
block_5:
store i32 %add2, i32* %output, align 4
store i32 %mul2, i32* %result, align 4
br label %block_7
block_6:
%div = udiv i32 %aval, %bval
ret void
block_7:
%sub = sub i32 %a2val, %b2val
ret void
}
define void @outline_outputs2() #0 {
entry:
%output = alloca i32, align 4
%result = alloca i32, align 4
%output2 = alloca i32, align 4
%result2 = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
br label %block_2
block_1:
%a2 = alloca i32, align 4
%b2 = alloca i32, align 4
br label %block_2
block_2:
%a2val = load i32, i32* %a
%b2val = load i32, i32* %b
%add2 = add i32 2, %a2val
%mul2 = mul i32 2, %b2val
br label %block_5
block_3:
%aval = load i32, i32* %a
%bval = load i32, i32* %b
%add = add i32 2, %aval
%mul = mul i32 2, %bval
br label %block_4
block_4:
store i32 %add, i32* %output, align 4
store i32 %mul, i32* %result, align 4
br label %block_7
block_5:
store i32 %add2, i32* %output, align 4
store i32 %mul2, i32* %result, align 4
br label %block_6
block_6:
%diff = sub i32 %a2val, %b2val
ret void
block_7:
%quot = udiv i32 %add, %mul
ret void
}
; CHECK-LABEL: @outline_outputs1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2:%.*]]
; CHECK: block_1:
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2]]
; CHECK: block_2:
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]], i32 0)
; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4
; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]]
; CHECK: block_6:
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]]
; CHECK-NEXT: ret void
; CHECK: block_7:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: @outline_outputs2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2:%.*]]
; CHECK: block_1:
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2]]
; CHECK: block_2:
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[MUL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[ADD_LOC]], i32* [[MUL_LOC]], i32 1)
; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[MUL_RELOAD:%.*]] = load i32, i32* [[MUL_LOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]]
; CHECK: block_6:
; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
; CHECK-NEXT: ret void
; CHECK: block_7:
; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[ADD_RELOAD]], [[MUL_RELOAD]]
; CHECK-NEXT: ret void
;
;
; CHECK: define internal i1 @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]]
; CHECK: block_2_to_outline:
; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]]
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]]
; CHECK-NEXT: br label [[BLOCK_5:%.*]]
; CHECK: block_3:
; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4
; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]]
; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]]
; CHECK-NEXT: br label [[BLOCK_4:%.*]]
; CHECK: block_4:
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4
; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4
; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]]
; CHECK: block_5:
; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4
; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4
; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]]
; CHECK: block_6.exitStub:
; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
; CHECK-NEXT: ]
; CHECK: block_7.exitStub:
; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
; CHECK-NEXT: ]
; CHECK: output_block_0_0:
; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4
; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
; CHECK: output_block_0_1:
; CHECK-NEXT: store i32 [[AVAL]], i32* [[TMP6:%.*]], align 4
; CHECK-NEXT: store i32 [[BVAL]], i32* [[TMP7:%.*]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
; CHECK: output_block_1_0:
; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4
; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
; CHECK: output_block_1_1:
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6]], align 4
; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
; CHECK: final_block_0:
; CHECK-NEXT: ret i1 false
; CHECK: final_block_1:
; CHECK-NEXT: ret i1 true
;