
calculateByteProvider only cares about scalars or a single element within a vector. For the later there is the VectorIndex parameter to identify the element. All other properties, and specificially Index, are related to the underyling scalar type and thus when taking the size of a type it's the scalar size that matters. Fixes https://github.com/llvm/llvm-project/issues/148387
895 lines
27 KiB
LLVM
895 lines
27 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s
|
|
|
|
; ptr p; // p is 1 byte aligned
|
|
; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
|
|
define i32 @load_i32_by_i8_unaligned(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_unaligned:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w0, [x0]
|
|
; CHECK-NEXT: ret
|
|
%tmp2 = load i8, ptr %arg, align 1
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 16
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
|
|
%tmp15 = load i8, ptr %tmp14, align 1
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = shl nuw nsw i32 %tmp16, 24
|
|
%tmp18 = or i32 %tmp13, %tmp17
|
|
ret i32 %tmp18
|
|
}
|
|
|
|
; ptr p; // p is 4 byte aligned
|
|
; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
|
|
define i32 @load_i32_by_i8_aligned(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_aligned:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w0, [x0]
|
|
; CHECK-NEXT: ret
|
|
%tmp2 = load i8, ptr %arg, align 4
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 16
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
|
|
%tmp15 = load i8, ptr %tmp14, align 1
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = shl nuw nsw i32 %tmp16, 24
|
|
%tmp18 = or i32 %tmp13, %tmp17
|
|
ret i32 %tmp18
|
|
}
|
|
|
|
; ptr p; // p is 4 byte aligned
|
|
; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
|
|
define i32 @load_i32_by_i8_bswap(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_bswap:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: rev w0, w8
|
|
; CHECK-NEXT: ret
|
|
%tmp1 = load i8, ptr %arg, align 4
|
|
%tmp2 = zext i8 %tmp1 to i32
|
|
%tmp3 = shl nuw nsw i32 %tmp2, 24
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 16
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 8
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
|
|
%tmp15 = load i8, ptr %tmp14, align 1
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = or i32 %tmp13, %tmp16
|
|
ret i32 %tmp17
|
|
}
|
|
|
|
; ptr p; // p is 8 byte aligned
|
|
; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
|
|
define i64 @load_i64_by_i8(ptr %arg) {
|
|
; CHECK-LABEL: load_i64_by_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x0, [x0]
|
|
; CHECK-NEXT: ret
|
|
%tmp1 = load i8, ptr %arg, align 8
|
|
%tmp2 = zext i8 %tmp1 to i64
|
|
%tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
|
|
%tmp4 = load i8, ptr %tmp3, align 1
|
|
%tmp5 = zext i8 %tmp4 to i64
|
|
%tmp6 = shl nuw nsw i64 %tmp5, 8
|
|
%tmp7 = or i64 %tmp6, %tmp2
|
|
%tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
|
|
%tmp9 = load i8, ptr %tmp8, align 1
|
|
%tmp10 = zext i8 %tmp9 to i64
|
|
%tmp11 = shl nuw nsw i64 %tmp10, 16
|
|
%tmp12 = or i64 %tmp7, %tmp11
|
|
%tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
|
|
%tmp14 = load i8, ptr %tmp13, align 1
|
|
%tmp15 = zext i8 %tmp14 to i64
|
|
%tmp16 = shl nuw nsw i64 %tmp15, 24
|
|
%tmp17 = or i64 %tmp12, %tmp16
|
|
%tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
|
|
%tmp19 = load i8, ptr %tmp18, align 1
|
|
%tmp20 = zext i8 %tmp19 to i64
|
|
%tmp21 = shl nuw nsw i64 %tmp20, 32
|
|
%tmp22 = or i64 %tmp17, %tmp21
|
|
%tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
|
|
%tmp24 = load i8, ptr %tmp23, align 1
|
|
%tmp25 = zext i8 %tmp24 to i64
|
|
%tmp26 = shl nuw nsw i64 %tmp25, 40
|
|
%tmp27 = or i64 %tmp22, %tmp26
|
|
%tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
|
|
%tmp29 = load i8, ptr %tmp28, align 1
|
|
%tmp30 = zext i8 %tmp29 to i64
|
|
%tmp31 = shl nuw nsw i64 %tmp30, 48
|
|
%tmp32 = or i64 %tmp27, %tmp31
|
|
%tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
|
|
%tmp34 = load i8, ptr %tmp33, align 1
|
|
%tmp35 = zext i8 %tmp34 to i64
|
|
%tmp36 = shl nuw i64 %tmp35, 56
|
|
%tmp37 = or i64 %tmp32, %tmp36
|
|
ret i64 %tmp37
|
|
}
|
|
|
|
; ptr p; // p is 8 byte aligned
|
|
; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
|
|
define i64 @load_i64_by_i8_bswap(ptr %arg) {
|
|
; CHECK-LABEL: load_i64_by_i8_bswap:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: rev x0, x8
|
|
; CHECK-NEXT: ret
|
|
%tmp1 = load i8, ptr %arg, align 8
|
|
%tmp2 = zext i8 %tmp1 to i64
|
|
%tmp3 = shl nuw i64 %tmp2, 56
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i64
|
|
%tmp7 = shl nuw nsw i64 %tmp6, 48
|
|
%tmp8 = or i64 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i64
|
|
%tmp12 = shl nuw nsw i64 %tmp11, 40
|
|
%tmp13 = or i64 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
|
|
%tmp15 = load i8, ptr %tmp14, align 1
|
|
%tmp16 = zext i8 %tmp15 to i64
|
|
%tmp17 = shl nuw nsw i64 %tmp16, 32
|
|
%tmp18 = or i64 %tmp13, %tmp17
|
|
%tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
|
|
%tmp20 = load i8, ptr %tmp19, align 1
|
|
%tmp21 = zext i8 %tmp20 to i64
|
|
%tmp22 = shl nuw nsw i64 %tmp21, 24
|
|
%tmp23 = or i64 %tmp18, %tmp22
|
|
%tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
|
|
%tmp25 = load i8, ptr %tmp24, align 1
|
|
%tmp26 = zext i8 %tmp25 to i64
|
|
%tmp27 = shl nuw nsw i64 %tmp26, 16
|
|
%tmp28 = or i64 %tmp23, %tmp27
|
|
%tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
|
|
%tmp30 = load i8, ptr %tmp29, align 1
|
|
%tmp31 = zext i8 %tmp30 to i64
|
|
%tmp32 = shl nuw nsw i64 %tmp31, 8
|
|
%tmp33 = or i64 %tmp28, %tmp32
|
|
%tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
|
|
%tmp35 = load i8, ptr %tmp34, align 1
|
|
%tmp36 = zext i8 %tmp35 to i64
|
|
%tmp37 = or i64 %tmp33, %tmp36
|
|
ret i64 %tmp37
|
|
}
|
|
|
|
; ptr p; // p[1] is 4 byte aligned
|
|
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
|
|
define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldur w0, [x0, #1]
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp2 = load i8, ptr %tmp1, align 4
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 16
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
|
|
%tmp15 = load i8, ptr %tmp14, align 1
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = shl nuw nsw i32 %tmp16, 24
|
|
%tmp18 = or i32 %tmp13, %tmp17
|
|
ret i32 %tmp18
|
|
}
|
|
|
|
; ptr p; // p[-4] is 4 byte aligned
|
|
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
|
|
define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_neg_offset:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldur w0, [x0, #-4]
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
|
|
%tmp2 = load i8, ptr %tmp1, align 4
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 16
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
|
|
%tmp15 = load i8, ptr %tmp14, align 1
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = shl nuw nsw i32 %tmp16, 24
|
|
%tmp18 = or i32 %tmp13, %tmp17
|
|
ret i32 %tmp18
|
|
}
|
|
|
|
; ptr p; // p[1] is 4 byte aligned
|
|
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
|
|
define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldur w8, [x0, #1]
|
|
; CHECK-NEXT: rev w0, w8
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
|
|
%tmp2 = load i8, ptr %tmp1, align 1
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 16
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp15 = load i8, ptr %tmp14, align 4
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = shl nuw nsw i32 %tmp16, 24
|
|
%tmp18 = or i32 %tmp13, %tmp17
|
|
ret i32 %tmp18
|
|
}
|
|
|
|
; ptr p; // p[-4] is 4 byte aligned
|
|
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
|
|
define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldur w8, [x0, #-4]
|
|
; CHECK-NEXT: rev w0, w8
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
|
|
%tmp2 = load i8, ptr %tmp1, align 1
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
%tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
|
|
%tmp10 = load i8, ptr %tmp9, align 1
|
|
%tmp11 = zext i8 %tmp10 to i32
|
|
%tmp12 = shl nuw nsw i32 %tmp11, 16
|
|
%tmp13 = or i32 %tmp8, %tmp12
|
|
%tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
|
|
%tmp15 = load i8, ptr %tmp14, align 4
|
|
%tmp16 = zext i8 %tmp15 to i32
|
|
%tmp17 = shl nuw nsw i32 %tmp16, 24
|
|
%tmp18 = or i32 %tmp13, %tmp17
|
|
ret i32 %tmp18
|
|
}
|
|
|
|
declare i16 @llvm.bswap.i16(i16)
|
|
|
|
; ptr p; // p is 4 byte aligned
|
|
; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
|
|
define i32 @load_i32_by_bswap_i16(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_bswap_i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: rev w0, w8
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = load i16, ptr %arg, align 4
|
|
%tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
|
|
%tmp2 = zext i16 %tmp11 to i32
|
|
%tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
|
|
%tmp4 = load i16, ptr %tmp3, align 1
|
|
%tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
|
|
%tmp5 = zext i16 %tmp41 to i32
|
|
%tmp6 = shl nuw nsw i32 %tmp2, 16
|
|
%tmp7 = or i32 %tmp6, %tmp5
|
|
ret i32 %tmp7
|
|
}
|
|
|
|
; ptr p; // p is 4 byte aligned
|
|
; (i32) p[0] | (sext(p[1] << 16) to i32)
|
|
define i32 @load_i32_by_sext_i16(ptr %arg) {
|
|
; CHECK-LABEL: load_i32_by_sext_i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w0, [x0]
|
|
; CHECK-NEXT: ret
|
|
%tmp1 = load i16, ptr %arg, align 4
|
|
%tmp2 = zext i16 %tmp1 to i32
|
|
%tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
|
|
%tmp4 = load i16, ptr %tmp3, align 1
|
|
%tmp5 = sext i16 %tmp4 to i32
|
|
%tmp6 = shl nuw nsw i32 %tmp5, 16
|
|
%tmp7 = or i32 %tmp6, %tmp2
|
|
ret i32 %tmp7
|
|
}
|
|
|
|
; ptr arg; i32 i;
|
|
; p = arg + 12;
|
|
; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
|
|
define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
|
|
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, w1, uxtw
|
|
; CHECK-NEXT: ldr w0, [x8, #12]
|
|
; CHECK-NEXT: ret
|
|
%tmp = add nuw nsw i32 %i, 3
|
|
%tmp2 = add nuw nsw i32 %i, 2
|
|
%tmp3 = add nuw nsw i32 %i, 1
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
|
|
%tmp5 = zext i32 %i to i64
|
|
%tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
|
|
%tmp7 = load i8, ptr %tmp6, align 4
|
|
%tmp8 = zext i8 %tmp7 to i32
|
|
%tmp9 = zext i32 %tmp3 to i64
|
|
%tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
|
|
%tmp11 = load i8, ptr %tmp10, align 1
|
|
%tmp12 = zext i8 %tmp11 to i32
|
|
%tmp13 = shl nuw nsw i32 %tmp12, 8
|
|
%tmp14 = or i32 %tmp13, %tmp8
|
|
%tmp15 = zext i32 %tmp2 to i64
|
|
%tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
|
|
%tmp17 = load i8, ptr %tmp16, align 1
|
|
%tmp18 = zext i8 %tmp17 to i32
|
|
%tmp19 = shl nuw nsw i32 %tmp18, 16
|
|
%tmp20 = or i32 %tmp14, %tmp19
|
|
%tmp21 = zext i32 %tmp to i64
|
|
%tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
|
|
%tmp23 = load i8, ptr %tmp22, align 1
|
|
%tmp24 = zext i8 %tmp23 to i32
|
|
%tmp25 = shl nuw i32 %tmp24, 24
|
|
%tmp26 = or i32 %tmp20, %tmp25
|
|
ret i32 %tmp26
|
|
}
|
|
|
|
; ptr arg; i32 i;
|
|
; p = arg + 12;
|
|
; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
|
|
define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
|
|
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, w1, uxtw
|
|
; CHECK-NEXT: ldur w0, [x8, #13]
|
|
; CHECK-NEXT: ret
|
|
%tmp = add nuw nsw i32 %i, 4
|
|
%tmp2 = add nuw nsw i32 %i, 3
|
|
%tmp3 = add nuw nsw i32 %i, 2
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
|
|
%tmp5 = add nuw nsw i32 %i, 1
|
|
%tmp27 = zext i32 %tmp5 to i64
|
|
%tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
|
|
%tmp29 = load i8, ptr %tmp28, align 4
|
|
%tmp30 = zext i8 %tmp29 to i32
|
|
%tmp31 = zext i32 %tmp3 to i64
|
|
%tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
|
|
%tmp33 = load i8, ptr %tmp32, align 1
|
|
%tmp34 = zext i8 %tmp33 to i32
|
|
%tmp35 = shl nuw nsw i32 %tmp34, 8
|
|
%tmp36 = or i32 %tmp35, %tmp30
|
|
%tmp37 = zext i32 %tmp2 to i64
|
|
%tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
|
|
%tmp39 = load i8, ptr %tmp38, align 1
|
|
%tmp40 = zext i8 %tmp39 to i32
|
|
%tmp41 = shl nuw nsw i32 %tmp40, 16
|
|
%tmp42 = or i32 %tmp36, %tmp41
|
|
%tmp43 = zext i32 %tmp to i64
|
|
%tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
|
|
%tmp45 = load i8, ptr %tmp44, align 1
|
|
%tmp46 = zext i8 %tmp45 to i32
|
|
%tmp47 = shl nuw i32 %tmp46, 24
|
|
%tmp48 = or i32 %tmp42, %tmp47
|
|
ret i32 %tmp48
|
|
}
|
|
|
|
; ptr p; // p is 2 byte aligned
|
|
; (i32) p[0] | ((i32) p[1] << 8)
|
|
define i32 @zext_load_i32_by_i8(ptr %arg) {
|
|
; CHECK-LABEL: zext_load_i32_by_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrh w0, [x0]
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp2 = load i8, ptr %arg, align 2
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
ret i32 %tmp8
|
|
}
|
|
|
|
; ptr p; // p is 2 byte aligned
|
|
; ((i32) p[0] << 8) | ((i32) p[1] << 16)
|
|
define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
|
|
; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrb w8, [x0]
|
|
; CHECK-NEXT: ldrb w9, [x0, #1]
|
|
; CHECK-NEXT: lsl w8, w8, #8
|
|
; CHECK-NEXT: orr w0, w8, w9, lsl #16
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp2 = load i8, ptr %arg, align 2
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp30 = shl nuw nsw i32 %tmp3, 8
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 16
|
|
%tmp8 = or i32 %tmp7, %tmp30
|
|
ret i32 %tmp8
|
|
}
|
|
|
|
; ptr p; // p is 2 byte aligned
|
|
; ((i32) p[0] << 16) | ((i32) p[1] << 24)
|
|
define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
|
|
; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrb w8, [x0]
|
|
; CHECK-NEXT: ldrb w9, [x0, #1]
|
|
; CHECK-NEXT: lsl w8, w8, #16
|
|
; CHECK-NEXT: orr w0, w8, w9, lsl #24
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp2 = load i8, ptr %arg, align 2
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp30 = shl nuw nsw i32 %tmp3, 16
|
|
%tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp5 = load i8, ptr %tmp4, align 1
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 24
|
|
%tmp8 = or i32 %tmp7, %tmp30
|
|
ret i32 %tmp8
|
|
}
|
|
; ptr p; // p is 2 byte aligned
|
|
; (i32) p[1] | ((i32) p[0] << 8)
|
|
define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
|
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrh w8, [x0]
|
|
; CHECK-NEXT: rev16 w0, w8
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp2 = load i8, ptr %tmp1, align 1
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp5 = load i8, ptr %arg, align 2
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 8
|
|
%tmp8 = or i32 %tmp7, %tmp3
|
|
ret i32 %tmp8
|
|
}
|
|
|
|
; ptr p; // p is 2 byte aligned
|
|
; ((i32) p[1] << 8) | ((i32) p[0] << 16)
|
|
define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
|
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrb w8, [x0, #1]
|
|
; CHECK-NEXT: ldrb w9, [x0]
|
|
; CHECK-NEXT: lsl w8, w8, #8
|
|
; CHECK-NEXT: orr w0, w8, w9, lsl #16
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp2 = load i8, ptr %tmp1, align 1
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp30 = shl nuw nsw i32 %tmp3, 8
|
|
%tmp5 = load i8, ptr %arg, align 2
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 16
|
|
%tmp8 = or i32 %tmp7, %tmp30
|
|
ret i32 %tmp8
|
|
}
|
|
|
|
; ptr p; // p is 2 byte aligned
|
|
; ((i32) p[1] << 16) | ((i32) p[0] << 24)
|
|
define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
|
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrb w8, [x0, #1]
|
|
; CHECK-NEXT: ldrb w9, [x0]
|
|
; CHECK-NEXT: lsl w8, w8, #16
|
|
; CHECK-NEXT: orr w0, w8, w9, lsl #24
|
|
; CHECK-NEXT: ret
|
|
|
|
%tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
|
|
%tmp2 = load i8, ptr %tmp1, align 1
|
|
%tmp3 = zext i8 %tmp2 to i32
|
|
%tmp30 = shl nuw nsw i32 %tmp3, 16
|
|
%tmp5 = load i8, ptr %arg, align 2
|
|
%tmp6 = zext i8 %tmp5 to i32
|
|
%tmp7 = shl nuw nsw i32 %tmp6, 24
|
|
%tmp8 = or i32 %tmp7, %tmp30
|
|
ret i32 %tmp8
|
|
}
|
|
|
|
; x1 = x0
|
|
define void @short_vector_to_i32(ptr %in, ptr %out, ptr %p) {
|
|
; CHECK-LABEL: short_vector_to_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <4 x i8> %ld, i32 0
|
|
%e2 = extractelement <4 x i8> %ld, i32 1
|
|
%e3 = extractelement <4 x i8> %ld, i32 2
|
|
%e4 = extractelement <4 x i8> %ld, i32 3
|
|
|
|
%z0 = zext i8 %e1 to i32
|
|
%z1 = zext i8 %e2 to i32
|
|
%z2 = zext i8 %e3 to i32
|
|
%z3 = zext i8 %e4 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
%s3 = shl nuw i32 %z3, 24
|
|
|
|
%i1 = or i32 %s1, %z0
|
|
%i2 = or i32 %i1, %s2
|
|
%i3 = or i32 %i2, %s3
|
|
|
|
store i32 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
|
|
; CHECK-LABEL: short_vector_to_i32_unused_low_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr s0, [x0]
|
|
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
|
; CHECK-NEXT: umov w8, v0.h[2]
|
|
; CHECK-NEXT: umov w9, v0.h[1]
|
|
; CHECK-NEXT: umov w10, v0.h[3]
|
|
; CHECK-NEXT: lsl w8, w8, #16
|
|
; CHECK-NEXT: bfi w8, w9, #8, #8
|
|
; CHECK-NEXT: orr w8, w8, w10, lsl #24
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <4 x i8>, ptr %in, align 4
|
|
|
|
%e2 = extractelement <4 x i8> %ld, i32 1
|
|
%e3 = extractelement <4 x i8> %ld, i32 2
|
|
%e4 = extractelement <4 x i8> %ld, i32 3
|
|
|
|
%z1 = zext i8 %e2 to i32
|
|
%z2 = zext i8 %e3 to i32
|
|
%z3 = zext i8 %e4 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
%s3 = shl nuw i32 %z3, 24
|
|
|
|
%i2 = or i32 %s1, %s2
|
|
%i3 = or i32 %i2, %s3
|
|
|
|
store i32 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
|
|
; CHECK-LABEL: short_vector_to_i32_unused_high_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr s0, [x0]
|
|
; CHECK-NEXT: ldrh w9, [x0]
|
|
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
|
; CHECK-NEXT: umov w8, v0.h[2]
|
|
; CHECK-NEXT: orr w8, w9, w8, lsl #16
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <4 x i8> %ld, i32 0
|
|
%e2 = extractelement <4 x i8> %ld, i32 1
|
|
%e3 = extractelement <4 x i8> %ld, i32 2
|
|
|
|
%z0 = zext i8 %e1 to i32
|
|
%z1 = zext i8 %e2 to i32
|
|
%z2 = zext i8 %e3 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
|
|
%i1 = or i32 %s1, %z0
|
|
%i2 = or i32 %i1, %s2
|
|
|
|
store i32 %i2, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
|
|
; CHECK-LABEL: short_vector_to_i32_unused_low_i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr s0, [x0]
|
|
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
|
; CHECK-NEXT: umov w8, v0.h[3]
|
|
; CHECK-NEXT: umov w9, v0.h[2]
|
|
; CHECK-NEXT: lsl w8, w8, #24
|
|
; CHECK-NEXT: orr w8, w8, w9, lsl #16
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <4 x i8>, ptr %in, align 4
|
|
|
|
%e3 = extractelement <4 x i8> %ld, i32 2
|
|
%e4 = extractelement <4 x i8> %ld, i32 3
|
|
|
|
%z2 = zext i8 %e3 to i32
|
|
%z3 = zext i8 %e4 to i32
|
|
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
%s3 = shl nuw i32 %z3, 24
|
|
|
|
%i3 = or i32 %s2, %s3
|
|
|
|
store i32 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; x1 = x0[0:1]
|
|
define void @short_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) {
|
|
; CHECK-LABEL: short_vector_to_i32_unused_high_i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrh w8, [x0]
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <4 x i8> %ld, i32 0
|
|
%e2 = extractelement <4 x i8> %ld, i32 1
|
|
|
|
%z0 = zext i8 %e1 to i32
|
|
%z1 = zext i8 %e2 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
|
|
%i1 = or i32 %s1, %z0
|
|
|
|
store i32 %i1, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; x1 = x0
|
|
define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) {
|
|
; CHECK-LABEL: short_vector_to_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <4 x i8> %ld, i32 0
|
|
%e2 = extractelement <4 x i8> %ld, i32 1
|
|
%e3 = extractelement <4 x i8> %ld, i32 2
|
|
%e4 = extractelement <4 x i8> %ld, i32 3
|
|
|
|
%z0 = zext i8 %e1 to i64
|
|
%z1 = zext i8 %e2 to i64
|
|
%z2 = zext i8 %e3 to i64
|
|
%z3 = zext i8 %e4 to i64
|
|
|
|
%s1 = shl nuw nsw i64 %z1, 8
|
|
%s2 = shl nuw nsw i64 %z2, 16
|
|
%s3 = shl nuw i64 %z3, 24
|
|
|
|
%i1 = or i64 %s1, %z0
|
|
%i2 = or i64 %i1, %s2
|
|
%i3 = or i64 %i2, %s3
|
|
|
|
store i64 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; x1 = x0
|
|
define void @scalable_vector_to_i32(ptr %in, ptr %out, ptr %p) #0 {
|
|
; CHECK-LABEL: scalable_vector_to_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <vscale x 4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
|
|
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
|
|
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
|
|
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
|
|
|
|
%z0 = zext i8 %e1 to i32
|
|
%z1 = zext i8 %e2 to i32
|
|
%z2 = zext i8 %e3 to i32
|
|
%z3 = zext i8 %e4 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
%s3 = shl nuw i32 %z3, 24
|
|
|
|
%i1 = or i32 %s1, %z0
|
|
%i2 = or i32 %i1, %s2
|
|
%i3 = or i32 %i2, %s3
|
|
|
|
store i32 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @scalable_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) #0 {
|
|
; CHECK-LABEL: scalable_vector_to_i32_unused_low_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: mov w8, v0.s[1]
|
|
; CHECK-NEXT: mov w9, v0.s[2]
|
|
; CHECK-NEXT: mov w10, v0.s[3]
|
|
; CHECK-NEXT: lsl w8, w8, #8
|
|
; CHECK-NEXT: orr w8, w8, w9, lsl #16
|
|
; CHECK-NEXT: orr w8, w8, w10, lsl #24
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <vscale x 4 x i8>, ptr %in, align 4
|
|
|
|
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
|
|
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
|
|
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
|
|
|
|
%z1 = zext i8 %e2 to i32
|
|
%z2 = zext i8 %e3 to i32
|
|
%z3 = zext i8 %e4 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
%s3 = shl nuw i32 %z3, 24
|
|
|
|
%i2 = or i32 %s1, %s2
|
|
%i3 = or i32 %i2, %s3
|
|
|
|
store i32 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @scalable_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) #0 {
|
|
; CHECK-LABEL: scalable_vector_to_i32_unused_high_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ldrh w9, [x0]
|
|
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: mov w8, v0.s[2]
|
|
; CHECK-NEXT: orr w8, w9, w8, lsl #16
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <vscale x 4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
|
|
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
|
|
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
|
|
|
|
%z0 = zext i8 %e1 to i32
|
|
%z1 = zext i8 %e2 to i32
|
|
%z2 = zext i8 %e3 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
|
|
%i1 = or i32 %s1, %z0
|
|
%i2 = or i32 %i1, %s2
|
|
|
|
store i32 %i2, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @scalable_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) #0 {
|
|
; CHECK-LABEL: scalable_vector_to_i32_unused_low_i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: mov w8, v0.s[2]
|
|
; CHECK-NEXT: mov w9, v0.s[3]
|
|
; CHECK-NEXT: lsl w8, w8, #16
|
|
; CHECK-NEXT: orr w8, w8, w9, lsl #24
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <vscale x 4 x i8>, ptr %in, align 4
|
|
|
|
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
|
|
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
|
|
|
|
%z2 = zext i8 %e3 to i32
|
|
%z3 = zext i8 %e4 to i32
|
|
|
|
%s2 = shl nuw nsw i32 %z2, 16
|
|
%s3 = shl nuw i32 %z3, 24
|
|
|
|
%i3 = or i32 %s2, %s3
|
|
|
|
store i32 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; x1 = x0[0:1]
|
|
define void @scalable_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) #0 {
|
|
; CHECK-LABEL: scalable_vector_to_i32_unused_high_i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldrh w8, [x0]
|
|
; CHECK-NEXT: str w8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <vscale x 4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
|
|
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
|
|
|
|
%z0 = zext i8 %e1 to i32
|
|
%z1 = zext i8 %e2 to i32
|
|
|
|
%s1 = shl nuw nsw i32 %z1, 8
|
|
|
|
%i1 = or i32 %s1, %z0
|
|
|
|
store i32 %i1, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; x1 = x0
|
|
define void @scalable_vector_to_i64(ptr %in, ptr %out, ptr %p) #0 {
|
|
; CHECK-LABEL: scalable_vector_to_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: ret
|
|
%ld = load <vscale x 4 x i8>, ptr %in, align 4
|
|
|
|
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
|
|
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
|
|
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
|
|
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
|
|
|
|
%z0 = zext i8 %e1 to i64
|
|
%z1 = zext i8 %e2 to i64
|
|
%z2 = zext i8 %e3 to i64
|
|
%z3 = zext i8 %e4 to i64
|
|
|
|
%s1 = shl nuw nsw i64 %z1, 8
|
|
%s2 = shl nuw nsw i64 %z2, 16
|
|
%s3 = shl nuw i64 %z3, 24
|
|
|
|
%i1 = or i64 %s1, %z0
|
|
%i2 = or i64 %i1, %s2
|
|
%i3 = or i64 %i2, %s3
|
|
|
|
store i64 %i3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|