llvm-project/llvm/test/Transforms/LoopInterchange/reductions-with-nowraps.ll
Ryotaro Kasuga b3c293c5b9
[LoopInterchange] Drop nuw/nsw flags from reduction ops when interchanging (#148612)
Before this patch, when a reduction exists in the loop, the legality
check of LoopInterchange only verified if there exists a
non-reassociative floating-point instruction in the reduction
calculation. However, it is insufficient, because reordering integer
reductions can also lead to incorrect transformations. Consider the
following example:

```c
int A[2][2] = {
  { INT_MAX, INT_MAX },
  { INT_MIN, INT_MIN },
};

int sum = 0;
for (int i = 0; i < 2; i++)
  for (int j = 0; j < 2; j++)
    sum += A[j][i];
```

To make this exchange legal, we must drop nuw/nsw flags from the
instructions involved in the reduction operations.

This patch extends the legality check to correctly handle such cases. In
particular, for integer addition and multiplication, it verifies that
the nsw and nuw flags are set on involved instructions, and drop them
when the transformation actually performed. This patch also introduces
explicit checks for the kind of reduction and permits only those that
are known to be safe for interchange. Consequently, some "unknown"
reductions (at the moment, `FindFirst*` and `FindLast*`) are rejected.

Fix #148228
2025-07-15 22:04:16 +09:00

145 lines
5.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-interchange -cache-line-size=64 -S < %s | FileCheck %s
; Check that nsw/nuw flags are dropped when interchanging loops.
;
; int sum = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
; sum += A[j][i];
;
define void @reduction_add(ptr %A) {
; CHECK-LABEL: define void @reduction_add(
; CHECK-SAME: ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
; CHECK: [[FOR_I_HEADER]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[FOR_I_LATCH:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
; CHECK-NEXT: [[SUM_J:%.*]] = phi i32 [ [[SUM_J_NEXT:%.*]], %[[FOR_I_LATCH]] ], [ [[SUM_I:%.*]], %[[FOR_I_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[FOR_J_SPLIT1:.*]]
; CHECK: [[FOR_J_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_J:.*]]
; CHECK: [[FOR_J]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
; CHECK-NEXT: [[SUM_I]] = phi i32 [ [[SUM_I_LCSSA:%.*]], %[[FOR_J_SPLIT]] ], [ 0, %[[FOR_J_PREHEADER]] ]
; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
; CHECK: [[FOR_J_SPLIT1]]:
; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[A]], i32 0, i32 [[J]], i32 [[I]]
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[IDX]], align 4
; CHECK-NEXT: [[SUM_J_NEXT]] = add i32 [[SUM_J]], [[A]]
; CHECK-NEXT: [[J_INC:%.*]] = add i32 [[J]], 1
; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i32 [[J_INC]], 2
; CHECK-NEXT: br label %[[FOR_I_LATCH]]
; CHECK: [[FOR_J_SPLIT]]:
; CHECK-NEXT: [[SUM_I_LCSSA]] = phi i32 [ [[SUM_J_NEXT]], %[[FOR_I_LATCH]] ]
; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 2
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[EXIT:.*]]
; CHECK: [[FOR_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[I_INC]], 2
; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_I_HEADER]], label %[[FOR_J_SPLIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.i.header
for.i.header:
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
%sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
br label %for.j
for.j:
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
%sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
%idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
%a = load i32, ptr %idx, align 4
%sum.j.next = add nuw nsw i32 %sum.j, %a
%j.inc = add i32 %j, 1
%cmp.j = icmp slt i32 %j.inc, 2
br i1 %cmp.j, label %for.j, label %for.i.latch
for.i.latch:
%sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
%i.inc = add i32 %i, 1
%cmp.i = icmp slt i32 %i.inc, 2
br i1 %cmp.i, label %for.i.header, label %exit
exit:
ret void
}
; Check that nsw/nuw flags are dropped when interchanging loops.
;
; int prod = 1;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
; prod *= A[j][i];
;
define void @reduction_mul(ptr %A) {
; CHECK-LABEL: define void @reduction_mul(
; CHECK-SAME: ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
; CHECK: [[FOR_I_HEADER]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[FOR_I_LATCH:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
; CHECK-NEXT: [[PROD_J:%.*]] = phi i32 [ [[PROD_J_NEXT:%.*]], %[[FOR_I_LATCH]] ], [ [[PROD_I:%.*]], %[[FOR_I_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[FOR_J_SPLIT1:.*]]
; CHECK: [[FOR_J_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_J:.*]]
; CHECK: [[FOR_J]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
; CHECK-NEXT: [[PROD_I]] = phi i32 [ [[PROD_I_LCSSA:%.*]], %[[FOR_J_SPLIT]] ], [ 1, %[[FOR_J_PREHEADER]] ]
; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
; CHECK: [[FOR_J_SPLIT1]]:
; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[A]], i32 0, i32 [[J]], i32 [[I]]
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[IDX]], align 4
; CHECK-NEXT: [[PROD_J_NEXT]] = mul i32 [[PROD_J]], [[A]]
; CHECK-NEXT: [[J_INC:%.*]] = add i32 [[J]], 1
; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i32 [[J_INC]], 2
; CHECK-NEXT: br label %[[FOR_I_LATCH]]
; CHECK: [[FOR_J_SPLIT]]:
; CHECK-NEXT: [[PROD_I_LCSSA]] = phi i32 [ [[PROD_J_NEXT]], %[[FOR_I_LATCH]] ]
; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 2
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[EXIT:.*]]
; CHECK: [[FOR_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[I_INC]], 2
; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_I_HEADER]], label %[[FOR_J_SPLIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.i.header
for.i.header:
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
%prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
br label %for.j
for.j:
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
%prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
%idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
%a = load i32, ptr %idx, align 4
%prod.j.next = mul nsw nuw i32 %prod.j, %a
%j.inc = add i32 %j, 1
%cmp.j = icmp slt i32 %j.inc, 2
br i1 %cmp.j, label %for.j, label %for.i.latch
for.i.latch:
%prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
%i.inc = add i32 %i, 1
%cmp.i = icmp slt i32 %i.inc, 2
br i1 %cmp.i, label %for.i.header, label %exit
exit:
ret void
}