Simon Pilgrim a2a0089ac3
[X86] movsd/movss/movd/movq - add support for constant comments (#78601)
If we're loading a constant value, print the constant (and the zero upper elements) instead of just the shuffle mask.

This did require me to move the shuffle mask handling into addConstantComments as we can't handle this in the MC layer.
2024-01-19 14:21:26 +00:00

363 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CMOV
; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=NOCMOV
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Test 2xCMOV patterns exposed after legalization.
; One way to do that is with (select (fcmp une/oeq)), which gets
; legalized to setp/setne.
define dso_local i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) nounwind {
; CMOV-LABEL: test_select_fcmp_oeq_i32:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: movl %edi, %eax
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: cmovnel %esi, %eax
; CMOV-NEXT: cmovpl %esi, %eax
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_select_fcmp_oeq_i32:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: jne .LBB0_3
; NOCMOV-NEXT: # %bb.1: # %entry
; NOCMOV-NEXT: jp .LBB0_3
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: .LBB0_3: # %entry
; NOCMOV-NEXT: movl (%eax), %eax
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp oeq float %a, %b
%r = select i1 %cmp, i32 %c, i32 %d
ret i32 %r
}
define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) nounwind {
; CMOV-LABEL: test_select_fcmp_oeq_i64:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: movq %rdi, %rax
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: cmovneq %rsi, %rax
; CMOV-NEXT: cmovpq %rsi, %rax
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_select_fcmp_oeq_i64:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: jne .LBB1_3
; NOCMOV-NEXT: # %bb.1: # %entry
; NOCMOV-NEXT: jp .LBB1_3
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: .LBB1_3: # %entry
; NOCMOV-NEXT: movl (%ecx), %eax
; NOCMOV-NEXT: movl 4(%ecx), %edx
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp oeq float %a, %b
%r = select i1 %cmp, i64 %c, i64 %d
ret i64 %r
}
define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) nounwind {
; CMOV-LABEL: test_select_fcmp_une_i64:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: movq %rsi, %rax
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: cmovneq %rdi, %rax
; CMOV-NEXT: cmovpq %rdi, %rax
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_select_fcmp_une_i64:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: jne .LBB2_3
; NOCMOV-NEXT: # %bb.1: # %entry
; NOCMOV-NEXT: jp .LBB2_3
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: .LBB2_3: # %entry
; NOCMOV-NEXT: movl (%ecx), %eax
; NOCMOV-NEXT: movl 4(%ecx), %edx
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp une float %a, %b
%r = select i1 %cmp, i64 %c, i64 %d
ret i64 %r
}
define dso_local double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) nounwind {
; CMOV-LABEL: test_select_fcmp_oeq_f64:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: jne .LBB3_3
; CMOV-NEXT: # %bb.1: # %entry
; CMOV-NEXT: jp .LBB3_3
; CMOV-NEXT: # %bb.2: # %entry
; CMOV-NEXT: movaps %xmm2, %xmm3
; CMOV-NEXT: .LBB3_3: # %entry
; CMOV-NEXT: movaps %xmm3, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_select_fcmp_oeq_f64:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: jne .LBB3_3
; NOCMOV-NEXT: # %bb.1: # %entry
; NOCMOV-NEXT: jp .LBB3_3
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: .LBB3_3: # %entry
; NOCMOV-NEXT: fldl (%eax)
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp oeq float %a, %b
%r = select i1 %cmp, double %c, double %d
ret double %r
}
define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) nounwind {
; CMOV-LABEL: test_select_fcmp_oeq_v4i32:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: jne .LBB4_3
; CMOV-NEXT: # %bb.1: # %entry
; CMOV-NEXT: jp .LBB4_3
; CMOV-NEXT: # %bb.2: # %entry
; CMOV-NEXT: movaps %xmm2, %xmm3
; CMOV-NEXT: .LBB4_3: # %entry
; CMOV-NEXT: movaps %xmm3, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_select_fcmp_oeq_v4i32:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: pushl %edi
; NOCMOV-NEXT: pushl %esi
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: jne .LBB4_3
; NOCMOV-NEXT: # %bb.1: # %entry
; NOCMOV-NEXT: jp .LBB4_3
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: .LBB4_3: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edx
; NOCMOV-NEXT: jne .LBB4_6
; NOCMOV-NEXT: # %bb.4: # %entry
; NOCMOV-NEXT: jp .LBB4_6
; NOCMOV-NEXT: # %bb.5: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edx
; NOCMOV-NEXT: .LBB4_6: # %entry
; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %esi
; NOCMOV-NEXT: jne .LBB4_9
; NOCMOV-NEXT: # %bb.7: # %entry
; NOCMOV-NEXT: jp .LBB4_9
; NOCMOV-NEXT: # %bb.8: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %esi
; NOCMOV-NEXT: .LBB4_9: # %entry
; NOCMOV-NEXT: movl (%ecx), %ecx
; NOCMOV-NEXT: movl (%edx), %edx
; NOCMOV-NEXT: movl (%esi), %esi
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edi
; NOCMOV-NEXT: jne .LBB4_12
; NOCMOV-NEXT: # %bb.10: # %entry
; NOCMOV-NEXT: jp .LBB4_12
; NOCMOV-NEXT: # %bb.11: # %entry
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edi
; NOCMOV-NEXT: .LBB4_12: # %entry
; NOCMOV-NEXT: movl (%edi), %edi
; NOCMOV-NEXT: movl %edi, 12(%eax)
; NOCMOV-NEXT: movl %esi, 8(%eax)
; NOCMOV-NEXT: movl %edx, 4(%eax)
; NOCMOV-NEXT: movl %ecx, (%eax)
; NOCMOV-NEXT: popl %esi
; NOCMOV-NEXT: popl %edi
; NOCMOV-NEXT: retl $4
entry:
%cmp = fcmp oeq float %a, %b
%r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
ret <4 x i32> %r
}
; Also make sure we catch the original code-sequence of interest:
define dso_local float @test_zext_fcmp_une(float %a, float %b) nounwind {
; CMOV-LABEL: test_zext_fcmp_une:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: cmpneqss %xmm1, %xmm0
; CMOV-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CMOV-NEXT: andps %xmm1, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_zext_fcmp_une:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: fld1
; NOCMOV-NEXT: fldz
; NOCMOV-NEXT: jne .LBB5_1
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: jp .LBB5_5
; NOCMOV-NEXT: # %bb.3: # %entry
; NOCMOV-NEXT: fstp %st(1)
; NOCMOV-NEXT: jmp .LBB5_4
; NOCMOV-NEXT: .LBB5_1:
; NOCMOV-NEXT: fstp %st(0)
; NOCMOV-NEXT: .LBB5_4: # %entry
; NOCMOV-NEXT: fldz
; NOCMOV-NEXT: .LBB5_5: # %entry
; NOCMOV-NEXT: fstp %st(0)
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp une float %a, %b
%conv1 = zext i1 %cmp to i32
%conv2 = sitofp i32 %conv1 to float
ret float %conv2
}
define dso_local float @test_zext_fcmp_oeq(float %a, float %b) nounwind {
; CMOV-LABEL: test_zext_fcmp_oeq:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: cmpeqss %xmm1, %xmm0
; CMOV-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CMOV-NEXT: andps %xmm1, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_zext_fcmp_oeq:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: fldz
; NOCMOV-NEXT: fld1
; NOCMOV-NEXT: jne .LBB6_1
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: jp .LBB6_5
; NOCMOV-NEXT: # %bb.3: # %entry
; NOCMOV-NEXT: fstp %st(1)
; NOCMOV-NEXT: jmp .LBB6_4
; NOCMOV-NEXT: .LBB6_1:
; NOCMOV-NEXT: fstp %st(0)
; NOCMOV-NEXT: .LBB6_4: # %entry
; NOCMOV-NEXT: fldz
; NOCMOV-NEXT: .LBB6_5: # %entry
; NOCMOV-NEXT: fstp %st(0)
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp oeq float %a, %b
%conv1 = zext i1 %cmp to i32
%conv2 = sitofp i32 %conv1 to float
ret float %conv2
}
attributes #0 = { nounwind }
@g8 = dso_local global i8 0
; The following test failed because llvm had a bug where a structure like:
;
; %12 = CMOV_GR8 %7, %11 ... (lt)
; %13 = CMOV_GR8 %12, %11 ... (gt)
;
; was lowered to:
;
; The first two cmovs got expanded to:
; %bb.0:
; JCC_1 %bb.9, 12
; %bb.7:
; JCC_1 %bb.9, 15
; %bb.8:
; %bb.9:
; %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)
; %13 = COPY %12
; Which was invalid as %12 is not the same value as %13
define dso_local void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) nounwind {
; CMOV-LABEL: no_cascade_opt:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: cmpl %edx, %esi
; CMOV-NEXT: movl $20, %eax
; CMOV-NEXT: cmovll %eax, %ecx
; CMOV-NEXT: cmovlel %ecx, %eax
; CMOV-NEXT: testl %edi, %edi
; CMOV-NEXT: cmovnel %ecx, %eax
; CMOV-NEXT: movb %al, g8(%rip)
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: no_cascade_opt:
; NOCMOV: # %bb.0: # %entry
; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; NOCMOV-NEXT: movb $20, %al
; NOCMOV-NEXT: movb $20, %cl
; NOCMOV-NEXT: jge .LBB7_1
; NOCMOV-NEXT: # %bb.2: # %entry
; NOCMOV-NEXT: jle .LBB7_3
; NOCMOV-NEXT: .LBB7_4: # %entry
; NOCMOV-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; NOCMOV-NEXT: jne .LBB7_5
; NOCMOV-NEXT: .LBB7_6: # %entry
; NOCMOV-NEXT: movb %al, g8
; NOCMOV-NEXT: retl
; NOCMOV-NEXT: .LBB7_1: # %entry
; NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: jg .LBB7_4
; NOCMOV-NEXT: .LBB7_3: # %entry
; NOCMOV-NEXT: movl %ecx, %eax
; NOCMOV-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; NOCMOV-NEXT: je .LBB7_6
; NOCMOV-NEXT: .LBB7_5: # %entry
; NOCMOV-NEXT: movl %ecx, %eax
; NOCMOV-NEXT: movb %al, g8
; NOCMOV-NEXT: retl
entry:
%c0 = icmp eq i32 %v0, 0
%c1 = icmp slt i32 %v1, %v2
%c2 = icmp sgt i32 %v1, %v2
%trunc = trunc i32 %v3 to i8
%sel0 = select i1 %c1, i8 20, i8 %trunc
%sel1 = select i1 %c2, i8 20, i8 %sel0
%sel2 = select i1 %c0, i8 %sel1, i8 %sel0
store volatile i8 %sel2, ptr @g8
ret void
}