
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
756 lines
22 KiB
LLVM
756 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
|
|
|
|
; First, a simple example from Clang. The registers could plausibly be
|
|
; different, but probably won't be.
|
|
|
|
%struct.foo = type { i8, [2 x i8], i8 }
|
|
|
|
define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
|
|
; CHECK-LABEL: from_clang:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: mov w8, #135 // =0x87
|
|
; CHECK-NEXT: and x9, x0, #0xffffff00
|
|
; CHECK-NEXT: and w8, w0, w8
|
|
; CHECK-NEXT: bfi w8, w1, #3, #4
|
|
; CHECK-NEXT: orr x0, x8, x9
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
|
|
%tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
|
|
%bf.value = shl i32 %n, 3
|
|
%0 = and i32 %bf.value, 120
|
|
%f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
|
|
%1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
|
|
%f.sroa.0.0.extract.trunc = zext i32 %1 to i64
|
|
%tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
|
|
%tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
|
|
%.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
|
|
ret [1 x i64] %.fca.0.insert
|
|
}
|
|
|
|
define void @test_whole32(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_whole32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: ldr w9, [x1]
|
|
; CHECK-NEXT: bfi w8, w9, #26, #5
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i32, ptr %existing
|
|
%oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
|
|
|
|
%newval = load volatile i32, ptr %new
|
|
%newval_shifted = shl i32 %newval, 26
|
|
%newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
|
|
|
|
%combined = or i32 %oldval_keep, %newval_masked
|
|
store volatile i32 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @test_whole64(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_whole64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: ldr x9, [x1]
|
|
; CHECK-NEXT: bfi x8, x9, #26, #14
|
|
; CHECK-NEXT: str x8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i64, ptr %existing
|
|
%oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
|
|
|
|
%newval = load volatile i64, ptr %new
|
|
%newval_shifted = shl i64 %newval, 26
|
|
%newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
|
|
|
|
%combined = or i64 %oldval_keep, %newval_masked
|
|
store volatile i64 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @test_whole32_from64(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_whole32_from64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: ldr x9, [x1]
|
|
; CHECK-NEXT: and x8, x8, #0xffff0000
|
|
; CHECK-NEXT: bfxil x8, x9, #0, #16
|
|
; CHECK-NEXT: str x8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i64, ptr %existing
|
|
%oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
|
|
|
|
%newval = load volatile i64, ptr %new
|
|
%newval_masked = and i64 %newval, 65535 ; = 0xffff
|
|
|
|
%combined = or i64 %oldval_keep, %newval_masked
|
|
store volatile i64 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @test_32bit_masked(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_32bit_masked:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w9, [x0]
|
|
; CHECK-NEXT: mov w8, #135 // =0x87
|
|
; CHECK-NEXT: ldr w10, [x1]
|
|
; CHECK-NEXT: and w8, w9, w8
|
|
; CHECK-NEXT: bfi w8, w10, #3, #4
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i32, ptr %existing
|
|
%oldval_keep = and i32 %oldval, 135 ; = 0x87
|
|
|
|
%newval = load volatile i32, ptr %new
|
|
%newval_shifted = shl i32 %newval, 3
|
|
%newval_masked = and i32 %newval_shifted, 120 ; = 0x78
|
|
|
|
%combined = or i32 %oldval_keep, %newval_masked
|
|
store volatile i32 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @test_64bit_masked(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_64bit_masked:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: ldr x9, [x1]
|
|
; CHECK-NEXT: and x8, x8, #0xff00000000
|
|
; CHECK-NEXT: bfi x8, x9, #40, #8
|
|
; CHECK-NEXT: str x8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i64, ptr %existing
|
|
%oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
|
|
|
|
%newval = load volatile i64, ptr %new
|
|
%newval_shifted = shl i64 %newval, 40
|
|
%newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
|
|
|
|
%combined = or i64 %newval_masked, %oldval_keep
|
|
store volatile i64 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
|
|
define void @test_32bit_complexmask(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_32bit_complexmask:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w9, [x0]
|
|
; CHECK-NEXT: mov w8, #647 // =0x287
|
|
; CHECK-NEXT: ldr w10, [x1]
|
|
; CHECK-NEXT: and w8, w9, w8
|
|
; CHECK-NEXT: bfi w8, w10, #3, #4
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i32, ptr %existing
|
|
%oldval_keep = and i32 %oldval, 647 ; = 0x287
|
|
|
|
%newval = load volatile i32, ptr %new
|
|
%newval_shifted = shl i32 %newval, 3
|
|
%newval_masked = and i32 %newval_shifted, 120 ; = 0x278
|
|
|
|
%combined = or i32 %oldval_keep, %newval_masked
|
|
store volatile i32 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
; Neither mask is a contiguous set of 1s. BFI can't be used
|
|
define void @test_32bit_badmask(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_32bit_badmask:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: ldr w9, [x1]
|
|
; CHECK-NEXT: mov w10, #632 // =0x278
|
|
; CHECK-NEXT: mov w11, #135 // =0x87
|
|
; CHECK-NEXT: and w9, w10, w9, lsl #3
|
|
; CHECK-NEXT: and w8, w8, w11
|
|
; CHECK-NEXT: orr w8, w8, w9
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i32, ptr %existing
|
|
%oldval_keep = and i32 %oldval, 135 ; = 0x87
|
|
|
|
%newval = load volatile i32, ptr %new
|
|
%newval_shifted = shl i32 %newval, 3
|
|
%newval_masked = and i32 %newval_shifted, 632 ; = 0x278
|
|
|
|
%combined = or i32 %oldval_keep, %newval_masked
|
|
store volatile i32 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
; Ditto
|
|
define void @test_64bit_badmask(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_64bit_badmask:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: ldr x9, [x1]
|
|
; CHECK-NEXT: mov w10, #664 // =0x298
|
|
; CHECK-NEXT: mov w11, #135 // =0x87
|
|
; CHECK-NEXT: and x9, x10, x9, lsl #3
|
|
; CHECK-NEXT: and x8, x8, x11
|
|
; CHECK-NEXT: orr x8, x8, x9
|
|
; CHECK-NEXT: str x8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i64, ptr %existing
|
|
%oldval_keep = and i64 %oldval, 135 ; = 0x87
|
|
|
|
%newval = load volatile i64, ptr %new
|
|
%newval_shifted = shl i64 %newval, 3
|
|
%newval_masked = and i64 %newval_shifted, 664 ; = 0x278
|
|
|
|
%combined = or i64 %oldval_keep, %newval_masked
|
|
store volatile i64 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
; Bitfield insert where there's a left-over shr needed at the beginning
|
|
; (e.g. result of str.bf1 = str.bf2)
|
|
define void @test_32bit_with_shr(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_32bit_with_shr:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: ldr w9, [x1]
|
|
; CHECK-NEXT: lsr w9, w9, #14
|
|
; CHECK-NEXT: bfi w8, w9, #26, #5
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i32, ptr %existing
|
|
%oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
|
|
|
|
%newval = load i32, ptr %new
|
|
%newval_shifted = shl i32 %newval, 12
|
|
%newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
|
|
|
|
%combined = or i32 %oldval_keep, %newval_masked
|
|
store volatile i32 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
; Bitfield insert where the second or operand is a better match to be folded into the BFM
|
|
define void @test_32bit_opnd1_better(ptr %existing, ptr %new) {
|
|
; CHECK-LABEL: test_32bit_opnd1_better:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: ldr w9, [x1]
|
|
; CHECK-NEXT: and w8, w8, #0xffff
|
|
; CHECK-NEXT: bfi w8, w9, #16, #8
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
%oldval = load volatile i32, ptr %existing
|
|
%oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff
|
|
|
|
%newval = load i32, ptr %new
|
|
%newval_shifted = shl i32 %newval, 16
|
|
%newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000
|
|
|
|
%combined = or i32 %oldval_keep, %newval_masked
|
|
store volatile i32 %combined, ptr %existing
|
|
|
|
ret void
|
|
}
|
|
|
|
; Tests when all the bits from one operand are not useful
|
|
define i32 @test_nouseful_bits(i8 %a, i32 %b) {
|
|
; CHECK-LABEL: test_nouseful_bits:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w0, #0xff
|
|
; CHECK-NEXT: lsl w8, w8, #8
|
|
; CHECK-NEXT: mov w9, w8
|
|
; CHECK-NEXT: bfxil w9, w0, #0, #8
|
|
; CHECK-NEXT: orr w0, w8, w9, lsl #16
|
|
; CHECK-NEXT: ret
|
|
%conv = zext i8 %a to i32 ; 0 0 0 A
|
|
%shl = shl i32 %b, 8 ; B2 B1 B0 0
|
|
%or = or i32 %conv, %shl ; B2 B1 B0 A
|
|
%shl.1 = shl i32 %or, 8 ; B1 B0 A 0
|
|
%or.1 = or i32 %conv, %shl.1 ; B1 B0 A A
|
|
%shl.2 = shl i32 %or.1, 8 ; B0 A A 0
|
|
%or.2 = or i32 %conv, %shl.2 ; B0 A A A
|
|
%shl.3 = shl i32 %or.2, 8 ; A A A 0
|
|
%or.3 = or i32 %conv, %shl.3 ; A A A A
|
|
%shl.4 = shl i32 %or.3, 8 ; A A A 0
|
|
ret i32 %shl.4
|
|
}
|
|
|
|
define void @test_nouseful_strb(ptr %ptr32, ptr %ptr8, i32 %x) {
|
|
; CHECK-LABEL: test_nouseful_strb:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: bfxil w8, w2, #16, #3
|
|
; CHECK-NEXT: strb w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = load i32, ptr %ptr32, align 8
|
|
%and = and i32 %0, -8
|
|
%shr = lshr i32 %x, 16
|
|
%and1 = and i32 %shr, 7
|
|
%or = or i32 %and, %and1
|
|
%trunc = trunc i32 %or to i8
|
|
store i8 %trunc, ptr %ptr8
|
|
ret void
|
|
}
|
|
|
|
define void @test_nouseful_strh(ptr %ptr32, ptr %ptr16, i32 %x) {
|
|
; CHECK-LABEL: test_nouseful_strh:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: bfxil w8, w2, #16, #4
|
|
; CHECK-NEXT: strh w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = load i32, ptr %ptr32, align 8
|
|
%and = and i32 %0, -16
|
|
%shr = lshr i32 %x, 16
|
|
%and1 = and i32 %shr, 15
|
|
%or = or i32 %and, %and1
|
|
%trunc = trunc i32 %or to i16
|
|
store i16 %trunc, ptr %ptr16
|
|
ret void
|
|
}
|
|
|
|
define void @test_nouseful_sturb(ptr %ptr32, ptr %ptr8, i32 %x) {
|
|
; CHECK-LABEL: test_nouseful_sturb:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: bfxil w8, w2, #16, #3
|
|
; CHECK-NEXT: sturb w8, [x1, #-1]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = load i32, ptr %ptr32, align 8
|
|
%and = and i32 %0, -8
|
|
%shr = lshr i32 %x, 16
|
|
%and1 = and i32 %shr, 7
|
|
%or = or i32 %and, %and1
|
|
%trunc = trunc i32 %or to i8
|
|
%gep = getelementptr i8, ptr %ptr8, i64 -1
|
|
store i8 %trunc, ptr %gep
|
|
ret void
|
|
}
|
|
|
|
define void @test_nouseful_sturh(ptr %ptr32, ptr %ptr16, i32 %x) {
|
|
; CHECK-LABEL: test_nouseful_sturh:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: bfxil w8, w2, #16, #4
|
|
; CHECK-NEXT: sturh w8, [x1, #-2]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = load i32, ptr %ptr32, align 8
|
|
%and = and i32 %0, -16
|
|
%shr = lshr i32 %x, 16
|
|
%and1 = and i32 %shr, 15
|
|
%or = or i32 %and, %and1
|
|
%trunc = trunc i32 %or to i16
|
|
%gep = getelementptr i16, ptr %ptr16, i64 -1
|
|
store i16 %trunc, ptr %gep
|
|
ret void
|
|
}
|
|
|
|
; The next set of tests generate a BFXIL from 'or (and X, Mask0Imm),
|
|
; (and Y, Mask1Imm)' iff Mask0Imm and ~Mask1Imm are equivalent and one of the
|
|
; MaskImms is a shifted mask (e.g., 0x000ffff0).
|
|
|
|
define i32 @test_or_and_and1(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_or_and_and1:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: lsr w8, w1, #4
|
|
; CHECK-NEXT: bfi w0, w8, #4, #12
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and = and i32 %a, -65521 ; 0xffff000f
|
|
%and1 = and i32 %b, 65520 ; 0x0000fff0
|
|
%or = or i32 %and1, %and
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @test_or_and_and2(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_or_and_and2:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: lsr w8, w0, #4
|
|
; CHECK-NEXT: mov w0, w1
|
|
; CHECK-NEXT: bfi w0, w8, #4, #12
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and = and i32 %a, 65520 ; 0x0000fff0
|
|
%and1 = and i32 %b, -65521 ; 0xffff000f
|
|
%or = or i32 %and1, %and
|
|
ret i32 %or
|
|
}
|
|
|
|
define i64 @test_or_and_and3(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: test_or_and_and3:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: lsr x8, x1, #16
|
|
; CHECK-NEXT: bfi x0, x8, #16, #32
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and = and i64 %a, -281474976645121 ; 0xffff00000000ffff
|
|
%and1 = and i64 %b, 281474976645120 ; 0x0000ffffffff0000
|
|
%or = or i64 %and1, %and
|
|
ret i64 %or
|
|
}
|
|
|
|
; Don't convert 'and' with multiple uses.
|
|
define i32 @test_or_and_and4(i32 %a, i32 %b, ptr %ptr) {
|
|
; CHECK-LABEL: test_or_and_and4:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: and w8, w0, #0xffff000f
|
|
; CHECK-NEXT: and w9, w1, #0xfff0
|
|
; CHECK-NEXT: orr w0, w9, w8
|
|
; CHECK-NEXT: str w8, [x2]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and = and i32 %a, -65521
|
|
store i32 %and, ptr %ptr, align 4
|
|
%and2 = and i32 %b, 65520
|
|
%or = or i32 %and2, %and
|
|
ret i32 %or
|
|
}
|
|
|
|
; Don't convert 'and' with multiple uses.
|
|
define i32 @test_or_and_and5(i32 %a, i32 %b, ptr %ptr) {
|
|
; CHECK-LABEL: test_or_and_and5:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: and w8, w1, #0xfff0
|
|
; CHECK-NEXT: and w9, w0, #0xffff000f
|
|
; CHECK-NEXT: orr w0, w8, w9
|
|
; CHECK-NEXT: str w8, [x2]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and = and i32 %b, 65520
|
|
store i32 %and, ptr %ptr, align 4
|
|
%and1 = and i32 %a, -65521
|
|
%or = or i32 %and, %and1
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @test1(i32 %a) {
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #5 // =0x5
|
|
; CHECK-NEXT: bfxil w0, w8, #0, #4
|
|
; CHECK-NEXT: ret
|
|
%1 = and i32 %a, -16 ; 0xfffffff0
|
|
%2 = or i32 %1, 5 ; 0x00000005
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test2(i32 %a) {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #10 // =0xa
|
|
; CHECK-NEXT: bfi w0, w8, #22, #4
|
|
; CHECK-NEXT: ret
|
|
%1 = and i32 %a, -62914561 ; 0xfc3fffff
|
|
%2 = or i32 %1, 41943040 ; 0x06400000
|
|
ret i32 %2
|
|
}
|
|
|
|
define i64 @test3(i64 %a) {
|
|
; CHECK-LABEL: test3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, #5 // =0x5
|
|
; CHECK-NEXT: bfxil x0, x8, #0, #3
|
|
; CHECK-NEXT: ret
|
|
%1 = and i64 %a, -8 ; 0xfffffffffffffff8
|
|
%2 = or i64 %1, 5 ; 0x0000000000000005
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @test4(i64 %a) {
|
|
; CHECK-LABEL: test4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, #9 // =0x9
|
|
; CHECK-NEXT: bfi x0, x8, #1, #7
|
|
; CHECK-NEXT: ret
|
|
%1 = and i64 %a, -255 ; 0xffffffffffffff01
|
|
%2 = or i64 %1, 18 ; 0x0000000000000012
|
|
ret i64 %2
|
|
}
|
|
|
|
; Don't generate BFI/BFXIL if the immediate can be encoded in the ORR.
|
|
define i32 @test5(i32 %a) {
|
|
; CHECK-LABEL: test5:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w0, #0xfffffff0
|
|
; CHECK-NEXT: orr w0, w8, #0x6
|
|
; CHECK-NEXT: ret
|
|
%1 = and i32 %a, 4294967280 ; 0xfffffff0
|
|
%2 = or i32 %1, 6 ; 0x00000006
|
|
ret i32 %2
|
|
}
|
|
|
|
; BFXIL will use the same constant as the ORR, so we don't care how the constant
|
|
; is materialized (it's an equal cost either way).
|
|
define i32 @test6(i32 %a) {
|
|
; CHECK-LABEL: test6:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #23250 // =0x5ad2
|
|
; CHECK-NEXT: movk w8, #11, lsl #16
|
|
; CHECK-NEXT: bfxil w0, w8, #0, #20
|
|
; CHECK-NEXT: ret
|
|
%1 = and i32 %a, 4293918720 ; 0xfff00000
|
|
%2 = or i32 %1, 744146 ; 0x000b5ad2
|
|
ret i32 %2
|
|
}
|
|
|
|
; BFIs that require the same number of instruction to materialize the constant
|
|
; as the original ORR are okay.
|
|
define i32 @test7(i32 %a) {
|
|
; CHECK-LABEL: test7:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #44393 // =0xad69
|
|
; CHECK-NEXT: movk w8, #5, lsl #16
|
|
; CHECK-NEXT: bfi w0, w8, #1, #19
|
|
; CHECK-NEXT: ret
|
|
%1 = and i32 %a, 4293918721 ; 0xfff00001
|
|
%2 = or i32 %1, 744146 ; 0x000b5ad2
|
|
ret i32 %2
|
|
}
|
|
|
|
; BFIs that require more instructions to materialize the constant as compared
|
|
; to the original ORR are not okay. In this case we would be replacing the
|
|
; 'and' with a 'movk', which would decrease ILP while using the same number of
|
|
; instructions.
|
|
define i64 @test8(i64 %a) {
|
|
; CHECK-LABEL: test8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, #2035482624 // =0x79530000
|
|
; CHECK-NEXT: and x9, x0, #0xff000000000000ff
|
|
; CHECK-NEXT: movk x8, #36694, lsl #32
|
|
; CHECK-NEXT: orr x0, x9, x8
|
|
; CHECK-NEXT: ret
|
|
%1 = and i64 %a, -72057594037927681 ; 0xff000000000000ff
|
|
%2 = or i64 %1, 157601565442048 ; 0x00008f5679530000
|
|
ret i64 %2
|
|
}
|
|
|
|
; This test exposed an issue with an overly aggressive assert. The bit of code
|
|
; that is expected to catch this case is unable to deal with the trunc, which
|
|
; results in a failing check due to a mismatch between the BFI opcode and
|
|
; the expected value type of the OR.
|
|
define i32 @test9(i64 %b, i32 %e) {
|
|
; CHECK-LABEL: test9:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsr x0, x0, #12
|
|
; CHECK-NEXT: lsr w8, w1, #23
|
|
; CHECK-NEXT: bfi w0, w8, #23, #9
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-NEXT: ret
|
|
%c = lshr i64 %b, 12
|
|
%d = trunc i64 %c to i32
|
|
%f = and i32 %d, 8388607
|
|
%g = and i32 %e, -8388608
|
|
%h = or i32 %g, %f
|
|
ret i32 %h
|
|
}
|
|
|
|
define <2 x i32> @test_complex_type(ptr %addr, i64 %in, ptr %bf ) {
|
|
; CHECK-LABEL: test_complex_type:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0], #8
|
|
; CHECK-NEXT: orr x8, x0, x1, lsl #32
|
|
; CHECK-NEXT: str x8, [x2]
|
|
; CHECK-NEXT: ret
|
|
%vec = load <2 x i32>, ptr %addr
|
|
|
|
%vec.next = getelementptr <2 x i32>, ptr %addr, i32 1
|
|
%lo = ptrtoint ptr %vec.next to i64
|
|
|
|
%hi = shl i64 %in, 32
|
|
%both = or i64 %lo, %hi
|
|
store i64 %both, ptr %bf
|
|
|
|
ret <2 x i32> %vec
|
|
}
|
|
|
|
define i64 @test_truncated_shift(i64 %x, i64 %y) {
|
|
; CHECK-LABEL: test_truncated_shift:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: bfi x0, x1, #25, #5
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and = and i64 %x, -1040187393
|
|
%shl4 = shl i64 %y, 25
|
|
%and5 = and i64 %shl4, 1040187392
|
|
%or = or i64 %and5, %and
|
|
ret i64 %or
|
|
}
|
|
|
|
define i64 @test_and_extended_shift_with_imm(i64 %0) {
|
|
; CHECK-LABEL: test_and_extended_shift_with_imm:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ubfiz x0, x0, #7, #8
|
|
; CHECK-NEXT: ret
|
|
%2 = shl i64 %0, 7
|
|
%3 = and i64 %2, 32640 ; #0x7f80
|
|
ret i64 %3
|
|
}
|
|
|
|
; orr with left-shifted operand is better than bfi, since it improves data
|
|
; dependency, and orr has a smaller latency and higher throughput than bfm on
|
|
; some AArch64 processors (for the rest, orr is at least as good as bfm)
|
|
;
|
|
; ubfx x8, x0, #8, #7
|
|
; and x9, x0, #0x7f
|
|
; orr x0, x9, x8, lsl #7
|
|
define i64 @test_orr_not_bfxil_i64(i64 %0) {
|
|
; CHECK-LABEL: test_orr_not_bfxil_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ubfx x8, x0, #8, #7
|
|
; CHECK-NEXT: and x9, x0, #0x7f
|
|
; CHECK-NEXT: orr x0, x9, x8, lsl #7
|
|
; CHECK-NEXT: ret
|
|
%2 = and i64 %0, 127
|
|
%3 = lshr i64 %0, 1
|
|
%4 = and i64 %3, 16256 ; 0x3f80
|
|
%5 = or i64 %4, %2
|
|
ret i64 %5
|
|
}
|
|
|
|
; The 32-bit test for `test_orr_not_bfxil_i64`.
|
|
define i32 @test_orr_not_bfxil_i32(i32 %0) {
|
|
; CHECK-LABEL: test_orr_not_bfxil_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ubfx w8, w0, #8, #7
|
|
; CHECK-NEXT: and w9, w0, #0x7f
|
|
; CHECK-NEXT: orr w0, w9, w8, lsl #7
|
|
; CHECK-NEXT: ret
|
|
%2 = and i32 %0, 127
|
|
%3 = lshr i32 %0, 1
|
|
%4 = and i32 %3, 16256 ; 0x3f80
|
|
%5 = or i32 %4, %2
|
|
ret i32 %5
|
|
}
|
|
|
|
; For or operation, one operand is a left shift of another operand.
|
|
; So orr with a left-shifted operand is generated (not bfi).
|
|
define i64 @test_orr_not_bfi_i64(i64 %0) {
|
|
; CHECK-LABEL: test_orr_not_bfi_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and x8, x0, #0xff
|
|
; CHECK-NEXT: orr x0, x8, x8, lsl #8
|
|
; CHECK-NEXT: ret
|
|
%2 = and i64 %0, 255
|
|
%3 = shl i64 %2, 8
|
|
%4 = or i64 %2, %3
|
|
ret i64 %4
|
|
}
|
|
|
|
; bfi is better than orr, since it would simplify away two instructions
|
|
; (%mask and %bit-field-pos-op).
|
|
define i32 @test_bfi_not_orr_i32(i32 %0, i32 %1) {
|
|
; CHECK-LABEL: test_bfi_not_orr_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w1, #0xff
|
|
; CHECK-NEXT: bfi w8, w0, #8, #8
|
|
; CHECK-NEXT: mov w0, w8
|
|
; CHECK-NEXT: ret
|
|
%bfi_dst = and i32 %1, 255
|
|
%mask = and i32 %0, 255
|
|
%bit-field-pos-op = shl i32 %mask, 8
|
|
%or_res = or i32 %bit-field-pos-op, %bfi_dst
|
|
ret i32 %or_res
|
|
}
|
|
|
|
; orr is generated (not bfi), since both simplify away one instruction (%3)
|
|
; while orr has shorter latency and higher throughput.
|
|
define i32 @test_orr_not_bfi_i32(i32 %0) {
|
|
; CHECK-LABEL: test_orr_not_bfi_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w0, #0xff
|
|
; CHECK-NEXT: orr w0, w8, w8, lsl #8
|
|
; CHECK-NEXT: ret
|
|
%2 = and i32 %0, 255
|
|
%3 = shl i32 %2, 8
|
|
%4 = or i32 %2, %3
|
|
ret i32 %4
|
|
}
|
|
|
|
; bfxil is better than orr, since it would simplify away two instructions
|
|
; (%mask and %bit-field-extract-op).
|
|
define i64 @test_bfxil_not_orr_i64(i64 %0, i64 %1) {
|
|
; CHECK-LABEL: test_bfxil_not_orr_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and x0, x0, #0xff000
|
|
; CHECK-NEXT: bfxil x0, x1, #12, #8
|
|
; CHECK-NEXT: ret
|
|
%shifted-mask = and i64 %1, 1044480
|
|
%bfi-dst = and i64 %0, 1044480
|
|
%bit-field-extract-op = lshr i64 %shifted-mask, 12
|
|
%or_res = or i64 %bit-field-extract-op, %bfi-dst
|
|
ret i64 %or_res
|
|
}
|
|
|
|
; orr is generated (not bfxil), since one operand is the right shift of another
|
|
; operand.
|
|
define i64 @orr_not_bfxil_test2_i64(i64 %0) {
|
|
; CHECK-LABEL: orr_not_bfxil_test2_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and x8, x0, #0xff000
|
|
; CHECK-NEXT: orr x0, x8, x8, lsr #12
|
|
; CHECK-NEXT: ret
|
|
%2 = and i64 %0, 1044480 ; 0xff000
|
|
%3 = lshr i64 %2, 12
|
|
%4 = or i64 %2, %3
|
|
ret i64 %4
|
|
}
|
|
|
|
; bfxil simplifies away two instructions (that computes %shifted-mask and
|
|
; %bit-field-extract-op respectively), so it's better than orr (which
|
|
; simplifies away at most one shift).
|
|
define i32 @test_bfxil_not_orr_i32(i32 %0, i32 %1) {
|
|
; CHECK-LABEL: test_bfxil_not_orr_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w0, w0, #0xff000
|
|
; CHECK-NEXT: bfxil w0, w1, #12, #8
|
|
; CHECK-NEXT: ret
|
|
%shifted-mask = and i32 %1, 1044480
|
|
%bfxil-dst = and i32 %0, 1044480
|
|
%bit-field-extract-op = lshr i32 %shifted-mask, 12
|
|
%or_res = or i32 %bit-field-extract-op, %bfxil-dst
|
|
ret i32 %or_res
|
|
}
|
|
|
|
; one operand is the shift of another operand, so orr is generated (not bfxil).
|
|
define i32 @orr_not_bfxil_test2_i32(i32 %0) {
|
|
; CHECK-LABEL: orr_not_bfxil_test2_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w0, #0xff000
|
|
; CHECK-NEXT: orr w0, w8, w8, lsr #12
|
|
; CHECK-NEXT: ret
|
|
%2 = and i32 %0, 1044480 ; 0xff000
|
|
%3 = lshr i32 %2, 12
|
|
%4 = or i32 %2, %3
|
|
ret i32 %4
|
|
}
|
|
|
|
define i16 @implicit_trunc_of_imm(ptr %p, i16 %a, i16 %b) {
|
|
; CHECK-LABEL: implicit_trunc_of_imm:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: and w8, w1, #0xffffe000
|
|
; CHECK-NEXT: mov x9, x0
|
|
; CHECK-NEXT: mov w10, w8
|
|
; CHECK-NEXT: mov w0, w8
|
|
; CHECK-NEXT: bfxil w10, w2, #0, #1
|
|
; CHECK-NEXT: strh w10, [x9]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%and1 = and i16 %a, -8192
|
|
%and2 = and i16 %b, 1
|
|
%or = or i16 %and2, %and1
|
|
store i16 %or, ptr %p
|
|
ret i16 %and1
|
|
}
|