llvm-project/llvm/test/CodeGen/X86/code-align-loops.ll
Vito Kortbeek e03b1b895e
[CodeGen][MachineLoop] Fix getLoopID (#137820)
Mirror the `getLoopID()` implementation of `LoopInfo` in
`MachineLoopInfo`.

`getLoopID` used `findLoopControlBlock` to detect the special case where
there is a single latch. However, `findLoopControlBlock` returns the
exiting block if the latch is not an exiting block. The middle end
places the `LoopID` metadata on the
latch regardless of if it's an exiting block or not.

I raised this issue in the PR that introduced the `getLoopID()` helper
(https://github.com/llvm/llvm-project/pull/71026#discussion_r2000595214)
and @FreddyLeaf confirmed this is a bug and asked me to help implement a
refinement.

I've mirrored the implementation of `LoopInfo` instead of simply
changing `findLoopControlBlock()` to `findLoopControlBlock()` to keep
the two implementations consistent. The only difference between the two
is that `MachineLoopInfo::getLoopID` initially starts out with a
`MachineBacisBlock` and attempts to retrieve the `BasicBlock` (if it
wasn't for this difference, I would have moved it to `genericLoopInfo`).

I've also updated the test associated with
https://github.com/llvm/llvm-project/pull/71026 (`test5`) that check the
alignment for a loop with a single latch that's not the exit. This test
will fail for the current implementation. I'm not sure if we want to
include this test upstream (it might look out of place after we remove
the 'single-latch-specialization' from `getLoopID()`).

Let me know if you have any comments, @FreddyLeaf !
2025-05-24 08:52:51 +02:00

229 lines
6.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256
; This test is to check if .p2align can be correctly generated by considerring
; 1. -align-loops=N from llc option
; 2. loop metadata node !{!"llvm.loop.align", i32 64}
; The test IR is generated from below simple C file:
; $ clang -S -emit-llvm loop.c
; $ cat loop.c
; void bar(void);
; void var(void);
; void foo(int a) {
; for (int i = 0; i < a; ++i)
; bar();
; for (int i = 0; i < a; ++i)
; var();
; }
; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.
; CHECK-LABEL: test1:
; ALIGN: .p2align 6
; ALIGN-NEXT: .LBB0_2: # %for.body
; ALIGN: .p2align 9
; ALIGN-NEXT: .LBB0_3: # %for.body
; ALIGN32: .p2align 6
; ALIGN32-NEXT: .LBB0_2: # %for.body
; ALIGN32: .p2align 9
; ALIGN32-NEXT: .LBB0_3: # %for.body
; ALIGN256: .p2align 8
; ALIGN256-NEXT: .LBB0_2: # %for.body
; ALIGN256: .p2align 9
; ALIGN256-NEXT: .LBB0_3: # %for.body
define void @test1(i32 %a) nounwind {
entry:
%cmp12 = icmp sgt i32 %a, 0
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
for.body: ; preds = %entry, %for.body
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
tail call void @bar()
%inc = add nuw nsw i32 %i.013, 1
%exitcond.not = icmp eq i32 %inc, %a
br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0
for.cond.cleanup4: ; preds = %for.body5, %entry
ret void
for.body5: ; preds = %for.body, %for.body5
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
tail call void @var()
%inc7 = add nuw nsw i32 %i1.015, 1
%exitcond16.not = icmp eq i32 %inc7, %a
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
}
; CHECK-LABEL: test2:
; ALIGN: .p2align 4
; ALIGN-NEXT: .LBB1_2: # %for.body
; ALIGN: .p2align 9
; ALIGN-NEXT: .LBB1_3: # %for.body
; ALIGN32: .p2align 5
; ALIGN32-NEXT: .LBB1_2: # %for.body
; ALIGN32: .p2align 9
; ALIGN32-NEXT: .LBB1_3: # %for.body
; ALIGN256: .p2align 8
; ALIGN256-NEXT: .LBB1_2: # %for.body
; ALIGN256: .p2align 9
; ALIGN256-NEXT: .LBB1_3: # %for.body
define void @test2(i32 %a) nounwind {
entry:
%cmp12 = icmp sgt i32 %a, 0
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
for.body: ; preds = %entry, %for.body
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
tail call void @bar()
%inc = add nuw nsw i32 %i.013, 1
%exitcond.not = icmp eq i32 %inc, %a
br i1 %exitcond.not, label %for.body5, label %for.body
for.cond.cleanup4: ; preds = %for.body5, %entry
ret void
for.body5: ; preds = %for.body, %for.body5
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
tail call void @var()
%inc7 = add nuw nsw i32 %i1.015, 1
%exitcond16.not = icmp eq i32 %inc7, %a
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
}
; test3 and test4 is to check if .p2align can be correctly set on loops with
; multi latches. The IR is generated from below simple C file:
; $ clang -O0 -S -emit-llvm loop.c
; $ cat loop.c
; int test3() {
; int i = 0;
; [[clang::code_align(32)]]
; while (i < 10) {
; if (i % 2) {
; continue;
; }
; i++;
; }
; }
; CHECK-LABEL: test3_multilatch:
; ALIGN: .p2align 6
; ALIGN-NEXT: .LBB2_1: # %while.cond
define dso_local i32 @test3_multilatch() #0 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
store i32 0, ptr %retval, align 4
store i32 0, ptr %i, align 4
br label %while.cond
while.cond: ; preds = %if.end, %if.then, %entry
%0 = load i32, ptr %i, align 4
%cmp = icmp slt i32 %0, 10
br i1 %cmp, label %while.body, label %while.end
while.body: ; preds = %while.cond
%1 = load i32, ptr %i, align 4
%rem = srem i32 %1, 2
%tobool = icmp ne i32 %rem, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %while.body
br label %while.cond, !llvm.loop !0
if.end: ; preds = %while.body
%2 = load i32, ptr %i, align 4
%inc = add nsw i32 %2, 1
store i32 %inc, ptr %i, align 4
br label %while.cond, !llvm.loop !0
while.end: ; preds = %while.cond
%3 = load i32, ptr %retval, align 4
ret i32 %3
}
; CHECK-LABEL: test4_multilatch:
; ALIGN: .p2align 6
; ALIGN-NEXT: .LBB3_4: # %bb4
define void @test4_multilatch(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
entry:
br label %bb1
bb1: ; preds = %bb2, %bb4, %entry
call void @bar()
%cmp3 = icmp sgt i32 %c, 10
br i1 %cmp3, label %bb3, label %bb4
bb2: ; preds = %bb3
call void @bar()
%cmp1 = icmp sgt i32 %a, 11
br i1 %cmp1, label %bb1, label %exit, !llvm.loop !0
bb3: ; preds = %bb1
call void @bar()
%cmp2 = icmp sgt i32 %b, 12
br i1 %cmp2, label %bb2, label %exit
bb4: ; preds = %bb1
call void @bar()
%cmp4 = icmp sgt i32 %d, 14
br i1 %cmp4, label %bb1, label %exit
exit: ; preds = %bb2, %bb3, %bb4
ret void
}
; test5 is to check if .p2align can be correctly set on loops with a single
; latch that's not the exiting block.
; The test IR is generated from below simple C file:
; $ clang -O0 -S -emit-llvm loop.c
; $ cat loop.c
; int test5(int n) {
; int i = 0;
; [[clang::code_align(64)]]
; while (i < n) {
; i++;
; }
; }
; CHECK-LABEL: test5:
; ALIGN: .p2align 6
; ALIGN-NEXT: .LBB4_1: # %while.cond
define i32 @test5(i32 %n) #0 {
entry:
%retval = alloca i32, align 4
%n.addr = alloca i32, align 4
%i = alloca i32, align 4
store i32 %n, ptr %n.addr, align 4
store i32 0, ptr %i, align 4
br label %while.cond
while.cond: ; preds = %while.body, %entry
%i.val = load i32, ptr %i, align 4
%n.val = load i32, ptr %n.addr, align 4
%cmp = icmp slt i32 %i.val, %n.val
br i1 %cmp, label %while.body, label %while.end
while.body: ; preds = %while.cond
%tmp = load i32, ptr %i, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, ptr %i, align 4
br label %while.cond, !llvm.loop !0
while.end: ; preds = %while.cond
%val = load i32, ptr %retval, align 4
ret i32 %val
}
declare void @bar()
declare void @var()
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.align", i32 64}
!2 = distinct !{!2, !3}
!3 = !{!"llvm.loop.align", i32 512}