When we start outlining across branches, there is the possibility that we will have two different blocks with different output locations, or a single branch that goes to two blocks outside of the region that is being outlined. While the CodeExtractor provides most of the mechanisms by using the return value of the extracted function as the input to a switch statement to correctly branch to the correct location, we need special handling for different output schemas to each location. This is done by repeating the existing storing scheme for each different exit block. We have a map from the return values used, to the basic block that is used to store the outputs for that particular exit block within the outlined function. Then if needed, we create a switch statement for each return block to branch to the correct set of stored outputs. Reviewers: paquette Differential Revision: https://reviews.llvm.org/D106993
230 lines
9.6 KiB
LLVM
230 lines
9.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
|
|
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
|
|
|
; Here we have multiple exits, but the different sources, different outputs are
|
|
; needed, this checks that they are handled by separate switch statements.
|
|
|
|
define void @outline_outputs1() #0 {
|
|
entry:
|
|
%output = alloca i32, align 4
|
|
%result = alloca i32, align 4
|
|
%output2 = alloca i32, align 4
|
|
%result2 = alloca i32, align 4
|
|
%a = alloca i32, align 4
|
|
%b = alloca i32, align 4
|
|
br label %block_2
|
|
block_1:
|
|
%a2 = alloca i32, align 4
|
|
%b2 = alloca i32, align 4
|
|
br label %block_2
|
|
block_2:
|
|
%a2val = load i32, i32* %a
|
|
%b2val = load i32, i32* %b
|
|
%add2 = add i32 2, %a2val
|
|
%mul2 = mul i32 2, %b2val
|
|
br label %block_5
|
|
block_3:
|
|
%aval = load i32, i32* %a
|
|
%bval = load i32, i32* %b
|
|
%add = add i32 2, %aval
|
|
%mul = mul i32 2, %bval
|
|
br label %block_4
|
|
block_4:
|
|
store i32 %add, i32* %output, align 4
|
|
store i32 %mul, i32* %result, align 4
|
|
br label %block_6
|
|
block_5:
|
|
store i32 %add2, i32* %output, align 4
|
|
store i32 %mul2, i32* %result, align 4
|
|
br label %block_7
|
|
block_6:
|
|
%div = udiv i32 %aval, %bval
|
|
ret void
|
|
block_7:
|
|
%sub = sub i32 %a2val, %b2val
|
|
ret void
|
|
}
|
|
|
|
define void @outline_outputs2() #0 {
|
|
entry:
|
|
%output = alloca i32, align 4
|
|
%result = alloca i32, align 4
|
|
%output2 = alloca i32, align 4
|
|
%result2 = alloca i32, align 4
|
|
%a = alloca i32, align 4
|
|
%b = alloca i32, align 4
|
|
br label %block_2
|
|
block_1:
|
|
%a2 = alloca i32, align 4
|
|
%b2 = alloca i32, align 4
|
|
br label %block_2
|
|
block_2:
|
|
%a2val = load i32, i32* %a
|
|
%b2val = load i32, i32* %b
|
|
%add2 = add i32 2, %a2val
|
|
%mul2 = mul i32 2, %b2val
|
|
br label %block_5
|
|
block_3:
|
|
%aval = load i32, i32* %a
|
|
%bval = load i32, i32* %b
|
|
%add = add i32 2, %aval
|
|
%mul = mul i32 2, %bval
|
|
br label %block_4
|
|
block_4:
|
|
store i32 %add, i32* %output, align 4
|
|
store i32 %mul, i32* %result, align 4
|
|
br label %block_7
|
|
block_5:
|
|
store i32 %add2, i32* %output, align 4
|
|
store i32 %mul2, i32* %result, align 4
|
|
br label %block_6
|
|
block_6:
|
|
%diff = sub i32 %a2val, %b2val
|
|
ret void
|
|
block_7:
|
|
%quot = udiv i32 %add, %mul
|
|
ret void
|
|
}
|
|
; CHECK-LABEL: @outline_outputs1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: br label [[BLOCK_2:%.*]]
|
|
; CHECK: block_1:
|
|
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: br label [[BLOCK_2]]
|
|
; CHECK: block_2:
|
|
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
|
|
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
|
|
; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
|
|
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]], i32 0)
|
|
; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
|
|
; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
|
|
; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4
|
|
; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
|
|
; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]]
|
|
; CHECK: block_6:
|
|
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]]
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: block_7:
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-LABEL: @outline_outputs2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[MUL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: br label [[BLOCK_2:%.*]]
|
|
; CHECK: block_1:
|
|
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: br label [[BLOCK_2]]
|
|
; CHECK: block_2:
|
|
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
|
|
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
|
|
; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
|
|
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[MUL_LOC]] to i8*
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[ADD_LOC]], i32* [[MUL_LOC]], i32 1)
|
|
; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
|
|
; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
|
|
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
|
|
; CHECK-NEXT: [[MUL_RELOAD:%.*]] = load i32, i32* [[MUL_LOC]], align 4
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
|
|
; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]]
|
|
; CHECK: block_6:
|
|
; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: block_7:
|
|
; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[ADD_RELOAD]], [[MUL_RELOAD]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK: define internal i1 @outlined_ir_func_0(
|
|
; CHECK-NEXT: newFuncRoot:
|
|
; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]]
|
|
; CHECK: block_2_to_outline:
|
|
; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
|
|
; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
|
|
; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]]
|
|
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]]
|
|
; CHECK-NEXT: br label [[BLOCK_5:%.*]]
|
|
; CHECK: block_3:
|
|
; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]]
|
|
; CHECK-NEXT: br label [[BLOCK_4:%.*]]
|
|
; CHECK: block_4:
|
|
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4
|
|
; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4
|
|
; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]]
|
|
; CHECK: block_5:
|
|
; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4
|
|
; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4
|
|
; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]]
|
|
; CHECK: block_6.exitStub:
|
|
; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [
|
|
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
|
|
; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: block_7.exitStub:
|
|
; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [
|
|
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
|
|
; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: output_block_0_0:
|
|
; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4
|
|
; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4
|
|
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
|
|
; CHECK: output_block_0_1:
|
|
; CHECK-NEXT: store i32 [[AVAL]], i32* [[TMP6:%.*]], align 4
|
|
; CHECK-NEXT: store i32 [[BVAL]], i32* [[TMP7:%.*]], align 4
|
|
; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
|
|
; CHECK: output_block_1_0:
|
|
; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4
|
|
; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4
|
|
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
|
|
; CHECK: output_block_1_1:
|
|
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6]], align 4
|
|
; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7]], align 4
|
|
; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
|
|
; CHECK: final_block_0:
|
|
; CHECK-NEXT: ret i1 false
|
|
; CHECK: final_block_1:
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|