
Before this patch, when a reduction exists in the loop, the legality check of LoopInterchange only verified if there exists a non-reassociative floating-point instruction in the reduction calculation. However, it is insufficient, because reordering integer reductions can also lead to incorrect transformations. Consider the following example: ```c int A[2][2] = { { INT_MAX, INT_MAX }, { INT_MIN, INT_MIN }, }; int sum = 0; for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) sum += A[j][i]; ``` To make this exchange legal, we must drop nuw/nsw flags from the instructions involved in the reduction operations. This patch extends the legality check to correctly handle such cases. In particular, for integer addition and multiplication, it verifies that the nsw and nuw flags are set on involved instructions, and drop them when the transformation actually performed. This patch also introduces explicit checks for the kind of reduction and permits only those that are known to be safe for interchange. Consequently, some "unknown" reductions (at the moment, `FindFirst*` and `FindLast*`) are rejected. Fix #148228
145 lines
5.8 KiB
LLVM
145 lines
5.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -passes=loop-interchange -cache-line-size=64 -S < %s | FileCheck %s
|
|
|
|
; Check that nsw/nuw flags are dropped when interchanging loops.
|
|
;
|
|
; int sum = 0;
|
|
; for (int i = 0; i < 2; i++)
|
|
; for (int j = 0; j < 2; j++)
|
|
; sum += A[j][i];
|
|
;
|
|
define void @reduction_add(ptr %A) {
|
|
; CHECK-LABEL: define void @reduction_add(
|
|
; CHECK-SAME: ptr [[A:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
|
|
; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
|
|
; CHECK: [[FOR_I_HEADER]]:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[FOR_I_LATCH:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: [[SUM_J:%.*]] = phi i32 [ [[SUM_J_NEXT:%.*]], %[[FOR_I_LATCH]] ], [ [[SUM_I:%.*]], %[[FOR_I_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_J_SPLIT1:.*]]
|
|
; CHECK: [[FOR_J_PREHEADER]]:
|
|
; CHECK-NEXT: br label %[[FOR_J:.*]]
|
|
; CHECK: [[FOR_J]]:
|
|
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
|
|
; CHECK-NEXT: [[SUM_I]] = phi i32 [ [[SUM_I_LCSSA:%.*]], %[[FOR_J_SPLIT]] ], [ 0, %[[FOR_J_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
|
|
; CHECK: [[FOR_J_SPLIT1]]:
|
|
; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[A]], i32 0, i32 [[J]], i32 [[I]]
|
|
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[IDX]], align 4
|
|
; CHECK-NEXT: [[SUM_J_NEXT]] = add i32 [[SUM_J]], [[A]]
|
|
; CHECK-NEXT: [[J_INC:%.*]] = add i32 [[J]], 1
|
|
; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i32 [[J_INC]], 2
|
|
; CHECK-NEXT: br label %[[FOR_I_LATCH]]
|
|
; CHECK: [[FOR_J_SPLIT]]:
|
|
; CHECK-NEXT: [[SUM_I_LCSSA]] = phi i32 [ [[SUM_J_NEXT]], %[[FOR_I_LATCH]] ]
|
|
; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 2
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[EXIT:.*]]
|
|
; CHECK: [[FOR_I_LATCH]]:
|
|
; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[I_INC]], 2
|
|
; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_I_HEADER]], label %[[FOR_J_SPLIT]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.i.header
|
|
|
|
for.i.header:
|
|
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
|
|
%sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
|
|
br label %for.j
|
|
|
|
for.j:
|
|
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
|
|
%sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
|
|
%idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
|
|
%a = load i32, ptr %idx, align 4
|
|
%sum.j.next = add nuw nsw i32 %sum.j, %a
|
|
%j.inc = add i32 %j, 1
|
|
%cmp.j = icmp slt i32 %j.inc, 2
|
|
br i1 %cmp.j, label %for.j, label %for.i.latch
|
|
|
|
for.i.latch:
|
|
%sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
|
|
%i.inc = add i32 %i, 1
|
|
%cmp.i = icmp slt i32 %i.inc, 2
|
|
br i1 %cmp.i, label %for.i.header, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check that nsw/nuw flags are dropped when interchanging loops.
|
|
;
|
|
; int prod = 1;
|
|
; for (int i = 0; i < 2; i++)
|
|
; for (int j = 0; j < 2; j++)
|
|
; prod *= A[j][i];
|
|
;
|
|
define void @reduction_mul(ptr %A) {
|
|
; CHECK-LABEL: define void @reduction_mul(
|
|
; CHECK-SAME: ptr [[A:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
|
|
; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
|
|
; CHECK: [[FOR_I_HEADER]]:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[FOR_I_LATCH:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: [[PROD_J:%.*]] = phi i32 [ [[PROD_J_NEXT:%.*]], %[[FOR_I_LATCH]] ], [ [[PROD_I:%.*]], %[[FOR_I_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_J_SPLIT1:.*]]
|
|
; CHECK: [[FOR_J_PREHEADER]]:
|
|
; CHECK-NEXT: br label %[[FOR_J:.*]]
|
|
; CHECK: [[FOR_J]]:
|
|
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
|
|
; CHECK-NEXT: [[PROD_I]] = phi i32 [ [[PROD_I_LCSSA:%.*]], %[[FOR_J_SPLIT]] ], [ 1, %[[FOR_J_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
|
|
; CHECK: [[FOR_J_SPLIT1]]:
|
|
; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[A]], i32 0, i32 [[J]], i32 [[I]]
|
|
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[IDX]], align 4
|
|
; CHECK-NEXT: [[PROD_J_NEXT]] = mul i32 [[PROD_J]], [[A]]
|
|
; CHECK-NEXT: [[J_INC:%.*]] = add i32 [[J]], 1
|
|
; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i32 [[J_INC]], 2
|
|
; CHECK-NEXT: br label %[[FOR_I_LATCH]]
|
|
; CHECK: [[FOR_J_SPLIT]]:
|
|
; CHECK-NEXT: [[PROD_I_LCSSA]] = phi i32 [ [[PROD_J_NEXT]], %[[FOR_I_LATCH]] ]
|
|
; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 2
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[EXIT:.*]]
|
|
; CHECK: [[FOR_I_LATCH]]:
|
|
; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[I_INC]], 2
|
|
; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_I_HEADER]], label %[[FOR_J_SPLIT]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.i.header
|
|
|
|
for.i.header:
|
|
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
|
|
%prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
|
|
br label %for.j
|
|
|
|
for.j:
|
|
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
|
|
%prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
|
|
%idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
|
|
%a = load i32, ptr %idx, align 4
|
|
%prod.j.next = mul nsw nuw i32 %prod.j, %a
|
|
%j.inc = add i32 %j, 1
|
|
%cmp.j = icmp slt i32 %j.inc, 2
|
|
br i1 %cmp.j, label %for.j, label %for.i.latch
|
|
|
|
for.i.latch:
|
|
%prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
|
|
%i.inc = add i32 %i, 1
|
|
%cmp.i = icmp slt i32 %i.inc, 2
|
|
br i1 %cmp.i, label %for.i.header, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|