
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
3372 lines
130 KiB
LLVM
3372 lines
130 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
|
|
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
|
|
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; CLZ
|
|
;
|
|
|
|
define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
|
|
; CHECK-LABEL: ctlz_v4i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: and z0.h, z0.h, #0xff
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v4i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w9
|
|
; NONEON-NOSVE-NEXT: clz w10, w10
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: sub w10, w10, #24
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: clz w8, w11
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: strh w10, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op)
|
|
ret <4 x i8> %res
|
|
}
|
|
|
|
define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
|
|
; CHECK-LABEL: ctlz_v8i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: clz z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v8i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op)
|
|
ret <8 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
|
|
; CHECK-LABEL: ctlz_v16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl16
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: clz z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v16i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op)
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define void @ctlz_v32i8(ptr %a) {
|
|
; CHECK-LABEL: ctlz_v32i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.b, vl16
|
|
; CHECK-NEXT: clz z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: clz z1.b, p0/m, z1.b
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v32i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #63]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #61]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #59]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #57]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #55]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #53]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #51]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #49]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <32 x i8>, ptr %a
|
|
%res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op)
|
|
store <32 x i8> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
|
|
; CHECK-LABEL: ctlz_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v2i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w9
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, #16
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op)
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
|
|
; CHECK-LABEL: ctlz_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v4i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op)
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
|
|
; CHECK-LABEL: ctlz_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v8i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op)
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @ctlz_v16i16(ptr %a) {
|
|
; CHECK-LABEL: ctlz_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: clz z1.h, p0/m, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v16i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: sub w8, w8, #16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <16 x i16>, ptr %a
|
|
%res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op)
|
|
store <16 x i16> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
|
|
; CHECK-LABEL: ctlz_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v2i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op)
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
|
|
; CHECK-LABEL: ctlz_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v4i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op)
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @ctlz_v8i32(ptr %a) {
|
|
; CHECK-LABEL: ctlz_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: clz z1.s, p0/m, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <8 x i32>, ptr %a
|
|
%res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op)
|
|
store <8 x i32> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
|
|
; CHECK-LABEL: ctlz_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: clz z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v1i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: fmov x8, d0
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: str x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
|
|
; CHECK-LABEL: ctlz_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: clz z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v2i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @ctlz_v4i64(ptr %a) {
|
|
; CHECK-LABEL: ctlz_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: clz z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: clz z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctlz_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: clz x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: clz x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp]
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <4 x i64>, ptr %a
|
|
%res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op)
|
|
store <4 x i64> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; CNT
|
|
;
|
|
|
|
define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
|
|
; CHECK-LABEL: ctpop_v4i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: and z0.h, z0.h, #0xff
|
|
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v4i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w13, w9, #1
|
|
; NONEON-NOSVE-NEXT: lsr w14, w11, #1
|
|
; NONEON-NOSVE-NEXT: lsr w15, w10, #1
|
|
; NONEON-NOSVE-NEXT: lsr w16, w12, #1
|
|
; NONEON-NOSVE-NEXT: and w13, w13, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w13
|
|
; NONEON-NOSVE-NEXT: and w13, w14, #0x55555555
|
|
; NONEON-NOSVE-NEXT: and w14, w15, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w11, w11, w13
|
|
; NONEON-NOSVE-NEXT: lsr w13, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w15, w16, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w10, w10, w14
|
|
; NONEON-NOSVE-NEXT: sub w12, w12, w15
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w13, w13, #0x33333333
|
|
; NONEON-NOSVE-NEXT: lsr w14, w11, #2
|
|
; NONEON-NOSVE-NEXT: lsr w15, w10, #2
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w13
|
|
; NONEON-NOSVE-NEXT: lsr w13, w12, #2
|
|
; NONEON-NOSVE-NEXT: and w11, w11, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w12, w12, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w13, w13, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w14
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w15
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w13
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11, lsr #4
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10, lsr #4
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: and w11, w11, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: and w12, w12, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: mul w11, w11, w8
|
|
; NONEON-NOSVE-NEXT: mul w10, w10, w8
|
|
; NONEON-NOSVE-NEXT: mul w8, w12, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: lsr w11, w11, #24
|
|
; NONEON-NOSVE-NEXT: lsr w10, w10, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strh w11, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: strh w10, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op)
|
|
ret <4 x i8> %res
|
|
}
|
|
|
|
define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
|
|
; CHECK-LABEL: ctpop_v8i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v8i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op)
|
|
ret <8 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
|
|
; CHECK-LABEL: ctpop_v16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl16
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v16i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op)
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define void @ctpop_v32i8(ptr %a) {
|
|
; CHECK-LABEL: ctpop_v32i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.b, vl16
|
|
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: cnt z1.b, p0/m, z1.b
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v32i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #63]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #61]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strb w9, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <32 x i8>, ptr %a
|
|
%res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op)
|
|
store <32 x i8> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
|
|
; CHECK-LABEL: ctpop_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v2i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrh w10, [sp]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #1
|
|
; NONEON-NOSVE-NEXT: lsr w12, w10, #1
|
|
; NONEON-NOSVE-NEXT: and w11, w11, #0x55555555
|
|
; NONEON-NOSVE-NEXT: and w12, w12, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w11
|
|
; NONEON-NOSVE-NEXT: sub w10, w10, w12
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #2
|
|
; NONEON-NOSVE-NEXT: lsr w12, w10, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w11, w11, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w11
|
|
; NONEON-NOSVE-NEXT: and w11, w12, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w11
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: mul w8, w10, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %op)
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
|
|
; CHECK-LABEL: ctpop_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v4i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op)
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
|
|
; CHECK-LABEL: ctpop_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v8i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op)
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @ctpop_v16i16(ptr %a) {
|
|
; CHECK-LABEL: ctpop_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: cnt z1.h, p0/m, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v16i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <16 x i16>, ptr %a
|
|
%res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op)
|
|
store <16 x i16> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
|
|
; CHECK-LABEL: ctpop_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v2i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op)
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
|
|
; CHECK-LABEL: ctpop_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v4i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op)
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @ctpop_v8i32(ptr %a) {
|
|
; CHECK-LABEL: ctpop_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: cnt z1.s, p0/m, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w9, w9, #24
|
|
; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w9, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w11, w9, #24
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
|
|
; NONEON-NOSVE-NEXT: sub w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: lsr w10, w9, #2
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul w8, w9, w8
|
|
; NONEON-NOSVE-NEXT: lsr w8, w8, #24
|
|
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <8 x i32>, ptr %a
|
|
%res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op)
|
|
store <8 x i32> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
|
|
; CHECK-LABEL: ctpop_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v1i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: fmov x9, d0
|
|
; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x8, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x8, x8, #56
|
|
; NONEON-NOSVE-NEXT: str x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
|
|
; CHECK-LABEL: ctpop_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v2i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr x9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x9, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x11, x9, #56
|
|
; NONEON-NOSVE-NEXT: ldr x9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x8, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x8, x8, #56
|
|
; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op)
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @ctpop_v4i64(ptr %a) {
|
|
; CHECK-LABEL: ctpop_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: cnt z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: ctpop_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldr x9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x9, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x11, x9, #56
|
|
; NONEON-NOSVE-NEXT: ldr x9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x9, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x9, x9, #56
|
|
; NONEON-NOSVE-NEXT: stp x9, x11, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr x9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x9, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x11, x9, #56
|
|
; NONEON-NOSVE-NEXT: ldr x9, [sp]
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
|
|
; NONEON-NOSVE-NEXT: sub x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: lsr x10, x9, #2
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: and x10, x10, #0x3333333333333333
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x10
|
|
; NONEON-NOSVE-NEXT: add x9, x9, x9, lsr #4
|
|
; NONEON-NOSVE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
|
|
; NONEON-NOSVE-NEXT: mul x8, x9, x8
|
|
; NONEON-NOSVE-NEXT: lsr x8, x8, #56
|
|
; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <4 x i64>, ptr %a
|
|
%res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op)
|
|
store <4 x i64> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; Count trailing zeros
|
|
;
|
|
|
|
define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
|
|
; CHECK-LABEL: cttz_v4i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: orr z0.h, z0.h, #0x100
|
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v4i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %op)
|
|
ret <4 x i8> %res
|
|
}
|
|
|
|
define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
|
|
; CHECK-LABEL: cttz_v8i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: clz z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v8i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op)
|
|
ret <8 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
|
|
; CHECK-LABEL: cttz_v16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl16
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: clz z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v16i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op)
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define void @cttz_v32i8(ptr %a) {
|
|
; CHECK-LABEL: cttz_v32i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.b, vl16
|
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: rbit z1.b, p0/m, z1.b
|
|
; CHECK-NEXT: clz z0.b, p0/m, z0.b
|
|
; CHECK-NEXT: clz z1.b, p0/m, z1.b
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v32i8:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #63]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #61]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #59]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #57]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #55]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #53]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #51]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #49]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x100
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <32 x i8>, ptr %a
|
|
%res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op)
|
|
store <32 x i8> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
|
|
; CHECK-LABEL: cttz_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: orr z0.s, z0.s, #0x10000
|
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v2i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %op)
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
|
|
; CHECK-LABEL: cttz_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v4i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op)
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
|
|
; CHECK-LABEL: cttz_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v8i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op)
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @cttz_v16i16(ptr %a) {
|
|
; CHECK-LABEL: cttz_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: rbit z1.h, p0/m, z1.h
|
|
; CHECK-NEXT: clz z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: clz z1.h, p0/m, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v16i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
|
|
; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <16 x i16>, ptr %a
|
|
%res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op)
|
|
store <16 x i16> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
|
|
; CHECK-LABEL: cttz_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v2i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op)
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
|
|
; CHECK-LABEL: cttz_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v4i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op)
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @cttz_v8i32(ptr %a) {
|
|
; CHECK-LABEL: cttz_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: rbit z1.s, p0/m, z1.s
|
|
; CHECK-NEXT: clz z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: clz z1.s, p0/m, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w9, w8
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp]
|
|
; NONEON-NOSVE-NEXT: rbit w8, w8
|
|
; NONEON-NOSVE-NEXT: clz w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <8 x i32>, ptr %a
|
|
%res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op)
|
|
store <8 x i32> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
|
|
; CHECK-LABEL: cttz_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: clz z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v1i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: fmov x8, d0
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: str x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
|
|
; CHECK-LABEL: cttz_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: clz z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v2i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp]
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @cttz_v4i64(ptr %a) {
|
|
; CHECK-LABEL: cttz_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q0, q1, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: rbit z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: clz z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: clz z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: cttz_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp]
|
|
; NONEON-NOSVE-NEXT: rbit x8, x8
|
|
; NONEON-NOSVE-NEXT: clz x8, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op = load <4 x i64>, ptr %a
|
|
%res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op)
|
|
store <4 x i64> %res, ptr %a
|
|
ret void
|
|
}
|
|
|
|
|
|
declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>)
|
|
declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>)
|
|
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>)
|
|
declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>)
|
|
declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>)
|
|
declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>)
|
|
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>)
|
|
declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>)
|
|
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>)
|
|
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>)
|
|
declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>)
|
|
declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>)
|
|
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
|
|
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>)
|
|
|
|
declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
|
|
declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
|
|
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
|
|
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
|
|
declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
|
|
declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
|
|
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
|
|
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
|
|
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
|
|
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
|
|
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
|
|
declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>)
|
|
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
|
|
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
|
|
|
|
declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>)
|
|
declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>)
|
|
declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>)
|
|
declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>)
|
|
declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>)
|
|
declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>)
|
|
declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>)
|
|
declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>)
|
|
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>)
|
|
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>)
|
|
declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>)
|
|
declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>)
|
|
declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
|
|
declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>)
|