
Mirror the `getLoopID()` implementation of `LoopInfo` in `MachineLoopInfo`. `getLoopID` used `findLoopControlBlock` to detect the special case where there is a single latch. However, `findLoopControlBlock` returns the exiting block if the latch is not an exiting block. The middle end places the `LoopID` metadata on the latch regardless of if it's an exiting block or not. I raised this issue in the PR that introduced the `getLoopID()` helper (https://github.com/llvm/llvm-project/pull/71026#discussion_r2000595214) and @FreddyLeaf confirmed this is a bug and asked me to help implement a refinement. I've mirrored the implementation of `LoopInfo` instead of simply changing `findLoopControlBlock()` to `findLoopControlBlock()` to keep the two implementations consistent. The only difference between the two is that `MachineLoopInfo::getLoopID` initially starts out with a `MachineBacisBlock` and attempts to retrieve the `BasicBlock` (if it wasn't for this difference, I would have moved it to `genericLoopInfo`). I've also updated the test associated with https://github.com/llvm/llvm-project/pull/71026 (`test5`) that check the alignment for a loop with a single latch that's not the exit. This test will fail for the current implementation. I'm not sure if we want to include this test upstream (it might look out of place after we remove the 'single-latch-specialization' from `getLoopID()`). Let me know if you have any comments, @FreddyLeaf !
229 lines
6.9 KiB
LLVM
229 lines
6.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
|
|
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
|
|
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256
|
|
|
|
; This test is to check if .p2align can be correctly generated by considerring
|
|
; 1. -align-loops=N from llc option
|
|
; 2. loop metadata node !{!"llvm.loop.align", i32 64}
|
|
; The test IR is generated from below simple C file:
|
|
; $ clang -S -emit-llvm loop.c
|
|
; $ cat loop.c
|
|
; void bar(void);
|
|
; void var(void);
|
|
; void foo(int a) {
|
|
; for (int i = 0; i < a; ++i)
|
|
; bar();
|
|
; for (int i = 0; i < a; ++i)
|
|
; var();
|
|
; }
|
|
; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.
|
|
|
|
; CHECK-LABEL: test1:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB0_2: # %for.body
|
|
; ALIGN: .p2align 9
|
|
; ALIGN-NEXT: .LBB0_3: # %for.body
|
|
|
|
; ALIGN32: .p2align 6
|
|
; ALIGN32-NEXT: .LBB0_2: # %for.body
|
|
; ALIGN32: .p2align 9
|
|
; ALIGN32-NEXT: .LBB0_3: # %for.body
|
|
|
|
; ALIGN256: .p2align 8
|
|
; ALIGN256-NEXT: .LBB0_2: # %for.body
|
|
; ALIGN256: .p2align 9
|
|
; ALIGN256-NEXT: .LBB0_3: # %for.body
|
|
|
|
define void @test1(i32 %a) nounwind {
|
|
entry:
|
|
%cmp12 = icmp sgt i32 %a, 0
|
|
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
tail call void @bar()
|
|
%inc = add nuw nsw i32 %i.013, 1
|
|
%exitcond.not = icmp eq i32 %inc, %a
|
|
br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0
|
|
|
|
for.cond.cleanup4: ; preds = %for.body5, %entry
|
|
ret void
|
|
|
|
for.body5: ; preds = %for.body, %for.body5
|
|
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
|
|
tail call void @var()
|
|
%inc7 = add nuw nsw i32 %i1.015, 1
|
|
%exitcond16.not = icmp eq i32 %inc7, %a
|
|
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
|
|
}
|
|
|
|
; CHECK-LABEL: test2:
|
|
; ALIGN: .p2align 4
|
|
; ALIGN-NEXT: .LBB1_2: # %for.body
|
|
; ALIGN: .p2align 9
|
|
; ALIGN-NEXT: .LBB1_3: # %for.body
|
|
|
|
; ALIGN32: .p2align 5
|
|
; ALIGN32-NEXT: .LBB1_2: # %for.body
|
|
; ALIGN32: .p2align 9
|
|
; ALIGN32-NEXT: .LBB1_3: # %for.body
|
|
|
|
; ALIGN256: .p2align 8
|
|
; ALIGN256-NEXT: .LBB1_2: # %for.body
|
|
; ALIGN256: .p2align 9
|
|
; ALIGN256-NEXT: .LBB1_3: # %for.body
|
|
define void @test2(i32 %a) nounwind {
|
|
entry:
|
|
%cmp12 = icmp sgt i32 %a, 0
|
|
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
tail call void @bar()
|
|
%inc = add nuw nsw i32 %i.013, 1
|
|
%exitcond.not = icmp eq i32 %inc, %a
|
|
br i1 %exitcond.not, label %for.body5, label %for.body
|
|
|
|
for.cond.cleanup4: ; preds = %for.body5, %entry
|
|
ret void
|
|
|
|
for.body5: ; preds = %for.body, %for.body5
|
|
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
|
|
tail call void @var()
|
|
%inc7 = add nuw nsw i32 %i1.015, 1
|
|
%exitcond16.not = icmp eq i32 %inc7, %a
|
|
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
|
|
}
|
|
|
|
; test3 and test4 is to check if .p2align can be correctly set on loops with
|
|
; multi latches. The IR is generated from below simple C file:
|
|
; $ clang -O0 -S -emit-llvm loop.c
|
|
; $ cat loop.c
|
|
; int test3() {
|
|
; int i = 0;
|
|
; [[clang::code_align(32)]]
|
|
; while (i < 10) {
|
|
; if (i % 2) {
|
|
; continue;
|
|
; }
|
|
; i++;
|
|
; }
|
|
; }
|
|
; CHECK-LABEL: test3_multilatch:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB2_1: # %while.cond
|
|
define dso_local i32 @test3_multilatch() #0 {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 0, ptr %retval, align 4
|
|
store i32 0, ptr %i, align 4
|
|
br label %while.cond
|
|
|
|
while.cond: ; preds = %if.end, %if.then, %entry
|
|
%0 = load i32, ptr %i, align 4
|
|
%cmp = icmp slt i32 %0, 10
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.body: ; preds = %while.cond
|
|
%1 = load i32, ptr %i, align 4
|
|
%rem = srem i32 %1, 2
|
|
%tobool = icmp ne i32 %rem, 0
|
|
br i1 %tobool, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %while.body
|
|
br label %while.cond, !llvm.loop !0
|
|
|
|
if.end: ; preds = %while.body
|
|
%2 = load i32, ptr %i, align 4
|
|
%inc = add nsw i32 %2, 1
|
|
store i32 %inc, ptr %i, align 4
|
|
br label %while.cond, !llvm.loop !0
|
|
|
|
while.end: ; preds = %while.cond
|
|
%3 = load i32, ptr %retval, align 4
|
|
ret i32 %3
|
|
}
|
|
|
|
; CHECK-LABEL: test4_multilatch:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB3_4: # %bb4
|
|
define void @test4_multilatch(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
|
entry:
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb2, %bb4, %entry
|
|
call void @bar()
|
|
%cmp3 = icmp sgt i32 %c, 10
|
|
br i1 %cmp3, label %bb3, label %bb4
|
|
|
|
bb2: ; preds = %bb3
|
|
call void @bar()
|
|
%cmp1 = icmp sgt i32 %a, 11
|
|
br i1 %cmp1, label %bb1, label %exit, !llvm.loop !0
|
|
|
|
bb3: ; preds = %bb1
|
|
call void @bar()
|
|
%cmp2 = icmp sgt i32 %b, 12
|
|
br i1 %cmp2, label %bb2, label %exit
|
|
|
|
bb4: ; preds = %bb1
|
|
call void @bar()
|
|
%cmp4 = icmp sgt i32 %d, 14
|
|
br i1 %cmp4, label %bb1, label %exit
|
|
|
|
exit: ; preds = %bb2, %bb3, %bb4
|
|
ret void
|
|
}
|
|
|
|
; test5 is to check if .p2align can be correctly set on loops with a single
|
|
; latch that's not the exiting block.
|
|
; The test IR is generated from below simple C file:
|
|
; $ clang -O0 -S -emit-llvm loop.c
|
|
; $ cat loop.c
|
|
; int test5(int n) {
|
|
; int i = 0;
|
|
; [[clang::code_align(64)]]
|
|
; while (i < n) {
|
|
; i++;
|
|
; }
|
|
; }
|
|
; CHECK-LABEL: test5:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB4_1: # %while.cond
|
|
define i32 @test5(i32 %n) #0 {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%n.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %n, ptr %n.addr, align 4
|
|
store i32 0, ptr %i, align 4
|
|
br label %while.cond
|
|
|
|
while.cond: ; preds = %while.body, %entry
|
|
%i.val = load i32, ptr %i, align 4
|
|
%n.val = load i32, ptr %n.addr, align 4
|
|
%cmp = icmp slt i32 %i.val, %n.val
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.body: ; preds = %while.cond
|
|
%tmp = load i32, ptr %i, align 4
|
|
%inc = add nsw i32 %tmp, 1
|
|
store i32 %inc, ptr %i, align 4
|
|
br label %while.cond, !llvm.loop !0
|
|
|
|
while.end: ; preds = %while.cond
|
|
%val = load i32, ptr %retval, align 4
|
|
ret i32 %val
|
|
}
|
|
|
|
|
|
declare void @bar()
|
|
declare void @var()
|
|
|
|
!0 = distinct !{!0, !1}
|
|
!1 = !{!"llvm.loop.align", i32 64}
|
|
!2 = distinct !{!2, !3}
|
|
!3 = !{!"llvm.loop.align", i32 512}
|