
As of rev ea222be0d, LLVMs assembler will actually try to honour the "fill value" part of p2align directives. X86 printed these as 0x90, which isn't actually what it wanted: we want multi-byte nops for .text padding. Compiling via a textual assembly file produces single-byte nop padding since ea222be0d but the built-in assembler will produce multi-byte nops. This divergent behaviour is undesirable. To fix: don't set the byte padding field for x86, which allows the assembler to pick multi-byte nops. Test that we get the same multi-byte padding when compiled via textual assembly or directly to object file. Added same-align-bytes-with-llasm-llobj.ll to that effect, updated numerous other tests to not contain check-lines for the explicit padding.
66 lines
2.2 KiB
LLVM
66 lines
2.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK
|
|
; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static < %s | FileCheck %s --check-prefix=ATOM
|
|
|
|
@A = external dso_local global [0 x double]
|
|
|
|
define void @foo(i64 %n) nounwind {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
; CHECK-NEXT: jle .LBB0_3
|
|
; CHECK-NEXT: # %bb.1: # %for.body.preheader
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [2.2999999999999998E+0,0.0E+0]
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_2: # %for.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
; CHECK-NEXT: mulsd %xmm0, %xmm1
|
|
; CHECK-NEXT: movsd %xmm1, A(,%rax,8)
|
|
; CHECK-NEXT: incq %rax
|
|
; CHECK-NEXT: cmpq %rax, %rdi
|
|
; CHECK-NEXT: jne .LBB0_2
|
|
; CHECK-NEXT: .LBB0_3: # %for.end
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; ATOM-LABEL: foo:
|
|
; ATOM: # %bb.0: # %entry
|
|
; ATOM-NEXT: testq %rdi, %rdi
|
|
; ATOM-NEXT: jle .LBB0_3
|
|
; ATOM-NEXT: # %bb.1: # %for.body.preheader
|
|
; ATOM-NEXT: xorl %eax, %eax
|
|
; ATOM-NEXT: movsd {{.*#+}} xmm0 = [2.2999999999999998E+0,0.0E+0]
|
|
; ATOM-NEXT: .p2align 4
|
|
; ATOM-NEXT: .LBB0_2: # %for.body
|
|
; ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
; ATOM-NEXT: mulsd %xmm0, %xmm1
|
|
; ATOM-NEXT: movsd %xmm1, A(,%rax,8)
|
|
; ATOM-NEXT: leaq 1(%rax), %rax
|
|
; ATOM-NEXT: cmpq %rax, %rdi
|
|
; ATOM-NEXT: jne .LBB0_2
|
|
; ATOM-NEXT: .LBB0_3: # %for.end
|
|
; ATOM-NEXT: nop
|
|
; ATOM-NEXT: nop
|
|
; ATOM-NEXT: nop
|
|
; ATOM-NEXT: nop
|
|
; ATOM-NEXT: retq
|
|
entry:
|
|
%cmp5 = icmp sgt i64 %n, 0
|
|
br i1 %cmp5, label %for.body, label %for.end
|
|
|
|
for.body:
|
|
%i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr [0 x double], ptr @A, i64 0, i64 %i.06
|
|
%tmp3 = load double, ptr %arrayidx, align 8
|
|
%mul = fmul double %tmp3, 2.300000e+00
|
|
store double %mul, ptr %arrayidx, align 8
|
|
%inc = add nsw i64 %i.06, 1
|
|
%exitcond = icmp eq i64 %inc, %n
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|