
As of rev ea222be0d, LLVMs assembler will actually try to honour the "fill value" part of p2align directives. X86 printed these as 0x90, which isn't actually what it wanted: we want multi-byte nops for .text padding. Compiling via a textual assembly file produces single-byte nop padding since ea222be0d but the built-in assembler will produce multi-byte nops. This divergent behaviour is undesirable. To fix: don't set the byte padding field for x86, which allows the assembler to pick multi-byte nops. Test that we get the same multi-byte padding when compiled via textual assembly or directly to object file. Added same-align-bytes-with-llasm-llobj.ll to that effect, updated numerous other tests to not contain check-lines for the explicit padding.
187 lines
5.7 KiB
LLVM
187 lines
5.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
|
|
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
|
|
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256
|
|
|
|
; This test is to check if .p2align can be correctly generated by considerring
|
|
; 1. -align-loops=N from llc option
|
|
; 2. loop metadata node !{!"llvm.loop.align", i32 64}
|
|
; The test IR is generated from below simple C file:
|
|
; $ clang -S -emit-llvm loop.c
|
|
; $ cat loop.c
|
|
; void bar(void);
|
|
; void var(void);
|
|
; void foo(int a) {
|
|
; for (int i = 0; i < a; ++i)
|
|
; bar();
|
|
; for (int i = 0; i < a; ++i)
|
|
; var();
|
|
; }
|
|
; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.
|
|
|
|
; CHECK-LABEL: test1:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB0_2: # %for.body
|
|
; ALIGN: .p2align 9
|
|
; ALIGN-NEXT: .LBB0_3: # %for.body
|
|
|
|
; ALIGN32: .p2align 6
|
|
; ALIGN32-NEXT: .LBB0_2: # %for.body
|
|
; ALIGN32: .p2align 9
|
|
; ALIGN32-NEXT: .LBB0_3: # %for.body
|
|
|
|
; ALIGN256: .p2align 8
|
|
; ALIGN256-NEXT: .LBB0_2: # %for.body
|
|
; ALIGN256: .p2align 9
|
|
; ALIGN256-NEXT: .LBB0_3: # %for.body
|
|
|
|
define void @test1(i32 %a) nounwind {
|
|
entry:
|
|
%cmp12 = icmp sgt i32 %a, 0
|
|
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
tail call void @bar()
|
|
%inc = add nuw nsw i32 %i.013, 1
|
|
%exitcond.not = icmp eq i32 %inc, %a
|
|
br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0
|
|
|
|
for.cond.cleanup4: ; preds = %for.body5, %entry
|
|
ret void
|
|
|
|
for.body5: ; preds = %for.body, %for.body5
|
|
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
|
|
tail call void @var()
|
|
%inc7 = add nuw nsw i32 %i1.015, 1
|
|
%exitcond16.not = icmp eq i32 %inc7, %a
|
|
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
|
|
}
|
|
|
|
; CHECK-LABEL: test2:
|
|
; ALIGN: .p2align 4
|
|
; ALIGN-NEXT: .LBB1_2: # %for.body
|
|
; ALIGN: .p2align 9
|
|
; ALIGN-NEXT: .LBB1_3: # %for.body
|
|
|
|
; ALIGN32: .p2align 5
|
|
; ALIGN32-NEXT: .LBB1_2: # %for.body
|
|
; ALIGN32: .p2align 9
|
|
; ALIGN32-NEXT: .LBB1_3: # %for.body
|
|
|
|
; ALIGN256: .p2align 8
|
|
; ALIGN256-NEXT: .LBB1_2: # %for.body
|
|
; ALIGN256: .p2align 9
|
|
; ALIGN256-NEXT: .LBB1_3: # %for.body
|
|
define void @test2(i32 %a) nounwind {
|
|
entry:
|
|
%cmp12 = icmp sgt i32 %a, 0
|
|
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
tail call void @bar()
|
|
%inc = add nuw nsw i32 %i.013, 1
|
|
%exitcond.not = icmp eq i32 %inc, %a
|
|
br i1 %exitcond.not, label %for.body5, label %for.body
|
|
|
|
for.cond.cleanup4: ; preds = %for.body5, %entry
|
|
ret void
|
|
|
|
for.body5: ; preds = %for.body, %for.body5
|
|
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
|
|
tail call void @var()
|
|
%inc7 = add nuw nsw i32 %i1.015, 1
|
|
%exitcond16.not = icmp eq i32 %inc7, %a
|
|
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
|
|
}
|
|
|
|
; test3 and test4 is to check if .p2align can be correctly set on loops with
|
|
; multi latches. The IR is generated from below simple C file:
|
|
; $ clang -O0 -S -emit-llvm loop.c
|
|
; $ cat loop.c
|
|
; int test3() {
|
|
; int i = 0;
|
|
; [[clang::code_align(32)]]
|
|
; while (i < 10) {
|
|
; if (i % 2) {
|
|
; continue;
|
|
; }
|
|
; i++;
|
|
; }
|
|
; }
|
|
; CHECK-LABEL: test3_multilatch:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB2_1: # %while.cond
|
|
define dso_local i32 @test3_multilatch() #0 {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 0, ptr %retval, align 4
|
|
store i32 0, ptr %i, align 4
|
|
br label %while.cond
|
|
|
|
while.cond: ; preds = %if.end, %if.then, %entry
|
|
%0 = load i32, ptr %i, align 4
|
|
%cmp = icmp slt i32 %0, 10
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.body: ; preds = %while.cond
|
|
%1 = load i32, ptr %i, align 4
|
|
%rem = srem i32 %1, 2
|
|
%tobool = icmp ne i32 %rem, 0
|
|
br i1 %tobool, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %while.body
|
|
br label %while.cond, !llvm.loop !0
|
|
|
|
if.end: ; preds = %while.body
|
|
%2 = load i32, ptr %i, align 4
|
|
%inc = add nsw i32 %2, 1
|
|
store i32 %inc, ptr %i, align 4
|
|
br label %while.cond, !llvm.loop !0
|
|
|
|
while.end: ; preds = %while.cond
|
|
%3 = load i32, ptr %retval, align 4
|
|
ret i32 %3
|
|
}
|
|
|
|
; CHECK-LABEL: test4_multilatch:
|
|
; ALIGN: .p2align 6
|
|
; ALIGN-NEXT: .LBB3_4: # %bb4
|
|
define void @test4_multilatch(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
|
entry:
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb2, %bb4, %entry
|
|
call void @bar()
|
|
%cmp3 = icmp sgt i32 %c, 10
|
|
br i1 %cmp3, label %bb3, label %bb4
|
|
|
|
bb2: ; preds = %bb3
|
|
call void @bar()
|
|
%cmp1 = icmp sgt i32 %a, 11
|
|
br i1 %cmp1, label %bb1, label %exit, !llvm.loop !0
|
|
|
|
bb3: ; preds = %bb1
|
|
call void @bar()
|
|
%cmp2 = icmp sgt i32 %b, 12
|
|
br i1 %cmp2, label %bb2, label %exit
|
|
|
|
bb4: ; preds = %bb1
|
|
call void @bar()
|
|
%cmp4 = icmp sgt i32 %d, 14
|
|
br i1 %cmp4, label %bb1, label %exit
|
|
|
|
exit: ; preds = %bb2, %bb3, %bb4
|
|
ret void
|
|
}
|
|
|
|
declare void @bar()
|
|
declare void @var()
|
|
|
|
!0 = distinct !{!0, !1}
|
|
!1 = !{!"llvm.loop.align", i32 64}
|
|
!2 = distinct !{!2, !3}
|
|
!3 = !{!"llvm.loop.align", i32 512}
|