
Intel docs have been updated to be similar to AMD and now describe BSF/BSR as not changing the destination register if the input value was zero, which allows us to support CTTZ/CTLZ zero-input cases by setting the destination to support a NumBits result (BSR is a bit messy as it has to be XOR'd to create a CTLZ result). VIA/Zhaoxin x86_64 CPUs have also been confirmed to match this behaviour. This patch adjusts the X86ISD::BSF/BSR nodes to take a "pass through" argument for zero-input cases, by default this is set to UNDEF to match existing behaviour, but it can be set to a suitable value if supported. There are still some limits to this - its only supported for x86_64 capable processors (and I've only enabled it for x86_64 codegen), and Intel CPUs sometimes zero the upper 32-bits of a pass through register when used for BSR32/BSF32 with a zero source value (i.e. the whole 64bits may not get passed through). Fixes #122004
157 lines
4.6 KiB
LLVM
157 lines
4.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-bmi2,-lzcnt | FileCheck %s --check-prefix=NOBMI
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+lzcnt | FileCheck %s --check-prefix=BMI
|
|
|
|
; Check the assembly sequence generated for std::bit_ceil.
|
|
|
|
; std::bit_ceil<uint32_t>(x)
|
|
define i32 @bit_ceil_i32(i32 %x) {
|
|
; NOBMI-LABEL: bit_ceil_i32:
|
|
; NOBMI: # %bb.0:
|
|
; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; NOBMI-NEXT: leal -1(%rdi), %eax
|
|
; NOBMI-NEXT: movl $63, %ecx
|
|
; NOBMI-NEXT: bsrl %eax, %ecx
|
|
; NOBMI-NEXT: xorl $31, %ecx
|
|
; NOBMI-NEXT: negb %cl
|
|
; NOBMI-NEXT: movl $1, %edx
|
|
; NOBMI-NEXT: movl $1, %eax
|
|
; NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
; NOBMI-NEXT: shll %cl, %eax
|
|
; NOBMI-NEXT: cmpl $2, %edi
|
|
; NOBMI-NEXT: cmovbl %edx, %eax
|
|
; NOBMI-NEXT: retq
|
|
;
|
|
; BMI-LABEL: bit_ceil_i32:
|
|
; BMI: # %bb.0:
|
|
; BMI-NEXT: # kill: def $edi killed $edi def $rdi
|
|
; BMI-NEXT: leal -1(%rdi), %eax
|
|
; BMI-NEXT: lzcntl %eax, %eax
|
|
; BMI-NEXT: negb %al
|
|
; BMI-NEXT: movl $1, %ecx
|
|
; BMI-NEXT: shlxl %eax, %ecx, %eax
|
|
; BMI-NEXT: cmpl $2, %edi
|
|
; BMI-NEXT: cmovbl %ecx, %eax
|
|
; BMI-NEXT: retq
|
|
%dec = add i32 %x, -1
|
|
%lz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
|
|
%cnt = sub i32 32, %lz
|
|
%res = shl i32 1, %cnt
|
|
%ugt = icmp ugt i32 %x, 1
|
|
%sel = select i1 %ugt, i32 %res, i32 1
|
|
ret i32 %sel
|
|
}
|
|
|
|
; std::bit_ceil<uint32_t>(x + 1)
|
|
define i32 @bit_ceil_i32_plus1(i32 noundef %x) {
|
|
; NOBMI-LABEL: bit_ceil_i32_plus1:
|
|
; NOBMI: # %bb.0: # %entry
|
|
; NOBMI-NEXT: movl $63, %ecx
|
|
; NOBMI-NEXT: bsrl %edi, %ecx
|
|
; NOBMI-NEXT: xorl $31, %ecx
|
|
; NOBMI-NEXT: negb %cl
|
|
; NOBMI-NEXT: movl $1, %edx
|
|
; NOBMI-NEXT: movl $1, %eax
|
|
; NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
; NOBMI-NEXT: shll %cl, %eax
|
|
; NOBMI-NEXT: decl %edi
|
|
; NOBMI-NEXT: cmpl $-2, %edi
|
|
; NOBMI-NEXT: cmovael %edx, %eax
|
|
; NOBMI-NEXT: retq
|
|
;
|
|
; BMI-LABEL: bit_ceil_i32_plus1:
|
|
; BMI: # %bb.0: # %entry
|
|
; BMI-NEXT: lzcntl %edi, %eax
|
|
; BMI-NEXT: negb %al
|
|
; BMI-NEXT: movl $1, %ecx
|
|
; BMI-NEXT: shlxl %eax, %ecx, %eax
|
|
; BMI-NEXT: decl %edi
|
|
; BMI-NEXT: cmpl $-2, %edi
|
|
; BMI-NEXT: cmovael %ecx, %eax
|
|
; BMI-NEXT: retq
|
|
entry:
|
|
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
|
%cnt = sub i32 32, %ctlz
|
|
%shl = shl i32 1, %cnt
|
|
%dec = add i32 %x, -1
|
|
%ult = icmp ult i32 %dec, -2
|
|
%sel = select i1 %ult, i32 %shl, i32 1
|
|
ret i32 %sel
|
|
}
|
|
|
|
; std::bit_ceil<uint64_t>(x)
|
|
define i64 @bit_ceil_i64(i64 %x) {
|
|
; NOBMI-LABEL: bit_ceil_i64:
|
|
; NOBMI: # %bb.0:
|
|
; NOBMI-NEXT: leaq -1(%rdi), %rax
|
|
; NOBMI-NEXT: movl $127, %ecx
|
|
; NOBMI-NEXT: bsrq %rax, %rcx
|
|
; NOBMI-NEXT: xorl $63, %ecx
|
|
; NOBMI-NEXT: negb %cl
|
|
; NOBMI-NEXT: movl $1, %edx
|
|
; NOBMI-NEXT: movl $1, %eax
|
|
; NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
|
; NOBMI-NEXT: shlq %cl, %rax
|
|
; NOBMI-NEXT: cmpq $2, %rdi
|
|
; NOBMI-NEXT: cmovbq %rdx, %rax
|
|
; NOBMI-NEXT: retq
|
|
;
|
|
; BMI-LABEL: bit_ceil_i64:
|
|
; BMI: # %bb.0:
|
|
; BMI-NEXT: leaq -1(%rdi), %rax
|
|
; BMI-NEXT: lzcntq %rax, %rax
|
|
; BMI-NEXT: negb %al
|
|
; BMI-NEXT: movl $1, %ecx
|
|
; BMI-NEXT: shlxq %rax, %rcx, %rax
|
|
; BMI-NEXT: cmpq $2, %rdi
|
|
; BMI-NEXT: cmovbq %rcx, %rax
|
|
; BMI-NEXT: retq
|
|
%dec = add i64 %x, -1
|
|
%lz = tail call i64 @llvm.ctlz.i64(i64 %dec, i1 false)
|
|
%cnt = sub i64 64, %lz
|
|
%res = shl i64 1, %cnt
|
|
%ugt = icmp ugt i64 %x, 1
|
|
%sel = select i1 %ugt, i64 %res, i64 1
|
|
ret i64 %sel
|
|
}
|
|
|
|
; std::bit_ceil<uint64_t>(x + 1)
|
|
define i64 @bit_ceil_i64_plus1(i64 noundef %x) {
|
|
; NOBMI-LABEL: bit_ceil_i64_plus1:
|
|
; NOBMI: # %bb.0: # %entry
|
|
; NOBMI-NEXT: movl $127, %ecx
|
|
; NOBMI-NEXT: bsrq %rdi, %rcx
|
|
; NOBMI-NEXT: xorl $63, %ecx
|
|
; NOBMI-NEXT: negb %cl
|
|
; NOBMI-NEXT: movl $1, %edx
|
|
; NOBMI-NEXT: movl $1, %eax
|
|
; NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
|
; NOBMI-NEXT: shlq %cl, %rax
|
|
; NOBMI-NEXT: decq %rdi
|
|
; NOBMI-NEXT: cmpq $-2, %rdi
|
|
; NOBMI-NEXT: cmovaeq %rdx, %rax
|
|
; NOBMI-NEXT: retq
|
|
;
|
|
; BMI-LABEL: bit_ceil_i64_plus1:
|
|
; BMI: # %bb.0: # %entry
|
|
; BMI-NEXT: lzcntq %rdi, %rax
|
|
; BMI-NEXT: negb %al
|
|
; BMI-NEXT: movl $1, %ecx
|
|
; BMI-NEXT: shlxq %rax, %rcx, %rax
|
|
; BMI-NEXT: decq %rdi
|
|
; BMI-NEXT: cmpq $-2, %rdi
|
|
; BMI-NEXT: cmovaeq %rcx, %rax
|
|
; BMI-NEXT: retq
|
|
entry:
|
|
%ctlz = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
|
|
%cnt = sub i64 64, %ctlz
|
|
%shl = shl i64 1, %cnt
|
|
%dec = add i64 %x, -1
|
|
%ult = icmp ult i64 %dec, -2
|
|
%sel = select i1 %ult, i64 %shl, i64 1
|
|
ret i64 %sel
|
|
}
|
|
|
|
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
|
|
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
|