
The wasm64 versions of the v128.storeX_lane instructions was incorrectly defined as returning a v128 value, which resulted in spurious drop instructions being emitted and causing validation to fail. This was not caught earlier because wasm64 has been experimental and not well tested. Update the relevant test file to test both wasm32 and wasm64. Fixes #62443. Differential Revision: https://reviews.llvm.org/D149780
1621 lines
56 KiB
LLVM
1621 lines
56 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm32-unknown-unknown | FileCheck %s
|
|
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm64-unknown-unknown | FileCheck %s --check-prefix MEM64
|
|
|
|
; Test SIMD v128.load{8,16,32,64}_lane instructions.
|
|
|
|
; TODO: Use the offset field by supporting more patterns. Right now only the
|
|
; equivalents of LoadPatNoOffset/StorePatNoOffset are supported.
|
|
|
|
;===----------------------------------------------------------------------------
|
|
; v128.load8_lane / v128.store8_lane
|
|
;===----------------------------------------------------------------------------
|
|
|
|
define <16 x i8> @load_lane_i8_no_offset(ptr %p, <16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_no_offset:
|
|
; CHECK: .functype load_lane_i8_no_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_no_offset:
|
|
; MEM64: .functype load_lane_i8_no_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i8, ptr %p
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define <16 x i8> @load_lane_i8_with_folded_offset(ptr %p, <16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_with_folded_offset:
|
|
; CHECK: .functype load_lane_i8_with_folded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_with_folded_offset:
|
|
; MEM64: .functype load_lane_i8_with_folded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i8, ptr %s
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define <16 x i8> @load_lane_i8_with_folded_gep_offset(ptr %p, <16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_with_folded_gep_offset:
|
|
; CHECK: .functype load_lane_i8_with_folded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 6
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_with_folded_gep_offset:
|
|
; MEM64: .functype load_lane_i8_with_folded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 6
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i8, ptr %p, i32 6
|
|
%x = load i8, ptr %s
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define <16 x i8> @load_lane_i8_with_unfolded_gep_negative_offset(ptr %p, <16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_lane_i8_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -6
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype load_lane_i8_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const -6
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i8, ptr %p, i32 -6
|
|
%x = load i8, ptr %s
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define <16 x i8> @load_lane_i8_with_unfolded_offset(ptr %p, <16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_with_unfolded_offset:
|
|
; CHECK: .functype load_lane_i8_with_unfolded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_with_unfolded_offset:
|
|
; MEM64: .functype load_lane_i8_with_unfolded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i8, ptr %s
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define <16 x i8> @load_lane_i8_with_unfolded_gep_offset(ptr %p, <16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_lane_i8_with_unfolded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 6
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_with_unfolded_gep_offset:
|
|
; MEM64: .functype load_lane_i8_with_unfolded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 6
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i8, ptr %p, i32 6
|
|
%x = load i8, ptr %s
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define <16 x i8> @load_lane_i8_from_numeric_address(<16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_from_numeric_address:
|
|
; CHECK: .functype load_lane_i8_from_numeric_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 42
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_from_numeric_address:
|
|
; MEM64: .functype load_lane_i8_from_numeric_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 42
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = load i8, ptr %s
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
@gv_i8 = global i8 0
|
|
define <16 x i8> @load_lane_i8_from_global_address(<16 x i8> %v) {
|
|
; CHECK-LABEL: load_lane_i8_from_global_address:
|
|
; CHECK: .functype load_lane_i8_from_global_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const gv_i8
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i8_from_global_address:
|
|
; MEM64: .functype load_lane_i8_from_global_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const gv_i8
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i8, ptr @gv_i8
|
|
%t = insertelement <16 x i8> %v, i8 %x, i32 0
|
|
ret <16 x i8> %t
|
|
}
|
|
|
|
define void @store_lane_i8_no_offset(<16 x i8> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i8_no_offset:
|
|
; CHECK: .functype store_lane_i8_no_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_no_offset:
|
|
; MEM64: .functype store_lane_i8_no_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_with_folded_offset(<16 x i8> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i8_with_folded_offset:
|
|
; CHECK: .functype store_lane_i8_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 24, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_with_folded_offset:
|
|
; MEM64: .functype store_lane_i8_with_folded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_with_folded_gep_offset(<16 x i8> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i8_with_folded_gep_offset:
|
|
; CHECK: .functype store_lane_i8_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 6, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_with_folded_gep_offset:
|
|
; MEM64: .functype store_lane_i8_with_folded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 6, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i8, ptr %p, i32 6
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_with_unfolded_gep_negative_offset(<16 x i8> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i8_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -6
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const -6
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i8, ptr %p, i32 -6
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_with_unfolded_offset(<16 x i8> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i8_with_unfolded_offset:
|
|
; CHECK: .functype store_lane_i8_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_with_unfolded_offset:
|
|
; MEM64: .functype store_lane_i8_with_unfolded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_with_unfolded_gep_offset(<16 x i8> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i8_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_lane_i8_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 6
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_with_unfolded_gep_offset:
|
|
; MEM64: .functype store_lane_i8_with_unfolded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const 6
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i8, ptr %p, i32 6
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_to_numeric_address(<16 x i8> %v) {
|
|
; CHECK-LABEL: store_lane_i8_to_numeric_address:
|
|
; CHECK: .functype store_lane_i8_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane 42, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_to_numeric_address:
|
|
; MEM64: .functype store_lane_i8_to_numeric_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane 42, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i8_from_global_address(<16 x i8> %v) {
|
|
; CHECK-LABEL: store_lane_i8_from_global_address:
|
|
; CHECK: .functype store_lane_i8_from_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store8_lane gv_i8, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i8_from_global_address:
|
|
; MEM64: .functype store_lane_i8_from_global_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store8_lane gv_i8, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <16 x i8> %v, i32 0
|
|
store i8 %x, ptr @gv_i8
|
|
ret void
|
|
}
|
|
|
|
;===----------------------------------------------------------------------------
|
|
; v128.load16_lane / v128.store16_lane
|
|
;===----------------------------------------------------------------------------
|
|
|
|
define <8 x i16> @load_lane_i16_no_offset(ptr %p, <8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_no_offset:
|
|
; CHECK: .functype load_lane_i16_no_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_no_offset:
|
|
; MEM64: .functype load_lane_i16_no_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i16, ptr %p
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define <8 x i16> @load_lane_i16_with_folded_offset(ptr %p, <8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_with_folded_offset:
|
|
; CHECK: .functype load_lane_i16_with_folded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_with_folded_offset:
|
|
; MEM64: .functype load_lane_i16_with_folded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i16, ptr %s
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define <8 x i16> @load_lane_i16_with_folded_gep_offset(ptr %p, <8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_with_folded_gep_offset:
|
|
; CHECK: .functype load_lane_i16_with_folded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 12
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_with_folded_gep_offset:
|
|
; MEM64: .functype load_lane_i16_with_folded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 12
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i16, ptr %p, i32 6
|
|
%x = load i16, ptr %s
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define <8 x i16> @load_lane_i16_with_unfolded_gep_negative_offset(ptr %p, <8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_lane_i16_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -12
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype load_lane_i16_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const -12
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i16, ptr %p, i32 -6
|
|
%x = load i16, ptr %s
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define <8 x i16> @load_lane_i16_with_unfolded_offset(ptr %p, <8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_with_unfolded_offset:
|
|
; CHECK: .functype load_lane_i16_with_unfolded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_with_unfolded_offset:
|
|
; MEM64: .functype load_lane_i16_with_unfolded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i16, ptr %s
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define <8 x i16> @load_lane_i16_with_unfolded_gep_offset(ptr %p, <8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_lane_i16_with_unfolded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 12
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_with_unfolded_gep_offset:
|
|
; MEM64: .functype load_lane_i16_with_unfolded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 12
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i16, ptr %p, i32 6
|
|
%x = load i16, ptr %s
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define <8 x i16> @load_lane_i16_from_numeric_address(<8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_from_numeric_address:
|
|
; CHECK: .functype load_lane_i16_from_numeric_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 42
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_from_numeric_address:
|
|
; MEM64: .functype load_lane_i16_from_numeric_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 42
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = load i16, ptr %s
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
@gv_i16 = global i16 0
|
|
define <8 x i16> @load_lane_i16_from_global_address(<8 x i16> %v) {
|
|
; CHECK-LABEL: load_lane_i16_from_global_address:
|
|
; CHECK: .functype load_lane_i16_from_global_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const gv_i16
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i16_from_global_address:
|
|
; MEM64: .functype load_lane_i16_from_global_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const gv_i16
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i16, ptr @gv_i16
|
|
%t = insertelement <8 x i16> %v, i16 %x, i32 0
|
|
ret <8 x i16> %t
|
|
}
|
|
|
|
define void @store_lane_i16_no_offset(<8 x i16> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i16_no_offset:
|
|
; CHECK: .functype store_lane_i16_no_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_no_offset:
|
|
; MEM64: .functype store_lane_i16_no_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_with_folded_offset(<8 x i16> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i16_with_folded_offset:
|
|
; CHECK: .functype store_lane_i16_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 24, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_with_folded_offset:
|
|
; MEM64: .functype store_lane_i16_with_folded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_with_folded_gep_offset(<8 x i16> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i16_with_folded_gep_offset:
|
|
; CHECK: .functype store_lane_i16_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 12, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_with_folded_gep_offset:
|
|
; MEM64: .functype store_lane_i16_with_folded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 12, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i16, ptr %p, i32 6
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_with_unfolded_gep_negative_offset(<8 x i16> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -12
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const -12
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i16, ptr %p, i32 -6
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_with_unfolded_offset(<8 x i16> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i16_with_unfolded_offset:
|
|
; CHECK: .functype store_lane_i16_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_with_unfolded_offset:
|
|
; MEM64: .functype store_lane_i16_with_unfolded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_with_unfolded_gep_offset(<8 x i16> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_lane_i16_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 12
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_with_unfolded_gep_offset:
|
|
; MEM64: .functype store_lane_i16_with_unfolded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const 12
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i16, ptr %p, i32 6
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_to_numeric_address(<8 x i16> %v) {
|
|
; CHECK-LABEL: store_lane_i16_to_numeric_address:
|
|
; CHECK: .functype store_lane_i16_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane 42, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_to_numeric_address:
|
|
; MEM64: .functype store_lane_i16_to_numeric_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane 42, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i16_from_global_address(<8 x i16> %v) {
|
|
; CHECK-LABEL: store_lane_i16_from_global_address:
|
|
; CHECK: .functype store_lane_i16_from_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store16_lane gv_i16, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i16_from_global_address:
|
|
; MEM64: .functype store_lane_i16_from_global_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store16_lane gv_i16, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <8 x i16> %v, i32 0
|
|
store i16 %x, ptr @gv_i16
|
|
ret void
|
|
}
|
|
|
|
;===----------------------------------------------------------------------------
|
|
; v128.load32_lane / v128.store32_lane
|
|
;===----------------------------------------------------------------------------
|
|
|
|
define <4 x i32> @load_lane_i32_no_offset(ptr %p, <4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_no_offset:
|
|
; CHECK: .functype load_lane_i32_no_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_no_offset:
|
|
; MEM64: .functype load_lane_i32_no_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i32, ptr %p
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define <4 x i32> @load_lane_i32_with_folded_offset(ptr %p, <4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_with_folded_offset:
|
|
; CHECK: .functype load_lane_i32_with_folded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_with_folded_offset:
|
|
; MEM64: .functype load_lane_i32_with_folded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i32, ptr %s
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define <4 x i32> @load_lane_i32_with_folded_gep_offset(ptr %p, <4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_with_folded_gep_offset:
|
|
; CHECK: .functype load_lane_i32_with_folded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_with_folded_gep_offset:
|
|
; MEM64: .functype load_lane_i32_with_folded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 24
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i32, ptr %p, i32 6
|
|
%x = load i32, ptr %s
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define <4 x i32> @load_lane_i32_with_unfolded_gep_negative_offset(ptr %p, <4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_lane_i32_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype load_lane_i32_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const -24
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i32, ptr %p, i32 -6
|
|
%x = load i32, ptr %s
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define <4 x i32> @load_lane_i32_with_unfolded_offset(ptr %p, <4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_with_unfolded_offset:
|
|
; CHECK: .functype load_lane_i32_with_unfolded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_with_unfolded_offset:
|
|
; MEM64: .functype load_lane_i32_with_unfolded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i32, ptr %s
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define <4 x i32> @load_lane_i32_with_unfolded_gep_offset(ptr %p, <4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_lane_i32_with_unfolded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_with_unfolded_gep_offset:
|
|
; MEM64: .functype load_lane_i32_with_unfolded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 24
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i32, ptr %p, i32 6
|
|
%x = load i32, ptr %s
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define <4 x i32> @load_lane_i32_from_numeric_address(<4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_from_numeric_address:
|
|
; CHECK: .functype load_lane_i32_from_numeric_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 42
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_from_numeric_address:
|
|
; MEM64: .functype load_lane_i32_from_numeric_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 42
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = load i32, ptr %s
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
@gv_i32 = global i32 0
|
|
define <4 x i32> @load_lane_i32_from_global_address(<4 x i32> %v) {
|
|
; CHECK-LABEL: load_lane_i32_from_global_address:
|
|
; CHECK: .functype load_lane_i32_from_global_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const gv_i32
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i32_from_global_address:
|
|
; MEM64: .functype load_lane_i32_from_global_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const gv_i32
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i32, ptr @gv_i32
|
|
%t = insertelement <4 x i32> %v, i32 %x, i32 0
|
|
ret <4 x i32> %t
|
|
}
|
|
|
|
define void @store_lane_i32_no_offset(<4 x i32> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i32_no_offset:
|
|
; CHECK: .functype store_lane_i32_no_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_no_offset:
|
|
; MEM64: .functype store_lane_i32_no_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_with_folded_offset(<4 x i32> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i32_with_folded_offset:
|
|
; CHECK: .functype store_lane_i32_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 24, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_with_folded_offset:
|
|
; MEM64: .functype store_lane_i32_with_folded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_with_folded_gep_offset(<4 x i32> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i32_with_folded_gep_offset:
|
|
; CHECK: .functype store_lane_i32_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 24, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_with_folded_gep_offset:
|
|
; MEM64: .functype store_lane_i32_with_folded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 24, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i32, ptr %p, i32 6
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_with_unfolded_gep_negative_offset(<4 x i32> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const -24
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i32, ptr %p, i32 -6
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_with_unfolded_offset(<4 x i32> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i32_with_unfolded_offset:
|
|
; CHECK: .functype store_lane_i32_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_with_unfolded_offset:
|
|
; MEM64: .functype store_lane_i32_with_unfolded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_with_unfolded_gep_offset(<4 x i32> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_lane_i32_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_with_unfolded_gep_offset:
|
|
; MEM64: .functype store_lane_i32_with_unfolded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const 24
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i32, ptr %p, i32 6
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_to_numeric_address(<4 x i32> %v) {
|
|
; CHECK-LABEL: store_lane_i32_to_numeric_address:
|
|
; CHECK: .functype store_lane_i32_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane 42, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_to_numeric_address:
|
|
; MEM64: .functype store_lane_i32_to_numeric_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane 42, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i32_from_global_address(<4 x i32> %v) {
|
|
; CHECK-LABEL: store_lane_i32_from_global_address:
|
|
; CHECK: .functype store_lane_i32_from_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store32_lane gv_i32, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i32_from_global_address:
|
|
; MEM64: .functype store_lane_i32_from_global_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store32_lane gv_i32, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <4 x i32> %v, i32 0
|
|
store i32 %x, ptr @gv_i32
|
|
ret void
|
|
}
|
|
|
|
;===----------------------------------------------------------------------------
|
|
; v128.load64_lane / v128.store64_lane
|
|
;===----------------------------------------------------------------------------
|
|
|
|
define <2 x i64> @load_lane_i64_no_offset(ptr %p, <2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_no_offset:
|
|
; CHECK: .functype load_lane_i64_no_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_no_offset:
|
|
; MEM64: .functype load_lane_i64_no_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i64, ptr %p
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define <2 x i64> @load_lane_i64_with_folded_offset(ptr %p, <2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_with_folded_offset:
|
|
; CHECK: .functype load_lane_i64_with_folded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_with_folded_offset:
|
|
; MEM64: .functype load_lane_i64_with_folded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i64, ptr %s
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define <2 x i64> @load_lane_i64_with_folded_gep_offset(ptr %p, <2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_with_folded_gep_offset:
|
|
; CHECK: .functype load_lane_i64_with_folded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 48
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_with_folded_gep_offset:
|
|
; MEM64: .functype load_lane_i64_with_folded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 48
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i64, ptr %p, i32 6
|
|
%x = load i64, ptr %s
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define <2 x i64> @load_lane_i64_with_unfolded_gep_negative_offset(ptr %p, <2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_lane_i64_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -48
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype load_lane_i64_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const -48
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i64, ptr %p, i32 -6
|
|
%x = load i64, ptr %s
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define <2 x i64> @load_lane_i64_with_unfolded_offset(ptr %p, <2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_with_unfolded_offset:
|
|
; CHECK: .functype load_lane_i64_with_unfolded_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_with_unfolded_offset:
|
|
; MEM64: .functype load_lane_i64_with_unfolded_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = load i64, ptr %s
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define <2 x i64> @load_lane_i64_with_unfolded_gep_offset(ptr %p, <2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_lane_i64_with_unfolded_gep_offset (i32, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 48
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_with_unfolded_gep_offset:
|
|
; MEM64: .functype load_lane_i64_with_unfolded_gep_offset (i64, v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: i64.const 48
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i64, ptr %p, i32 6
|
|
%x = load i64, ptr %s
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define <2 x i64> @load_lane_i64_from_numeric_address(<2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_from_numeric_address:
|
|
; CHECK: .functype load_lane_i64_from_numeric_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 42
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_from_numeric_address:
|
|
; MEM64: .functype load_lane_i64_from_numeric_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 42
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = load i64, ptr %s
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
@gv_i64 = global i64 0
|
|
define <2 x i64> @load_lane_i64_from_global_address(<2 x i64> %v) {
|
|
; CHECK-LABEL: load_lane_i64_from_global_address:
|
|
; CHECK: .functype load_lane_i64_from_global_address (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const gv_i64
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: load_lane_i64_from_global_address:
|
|
; MEM64: .functype load_lane_i64_from_global_address (v128) -> (v128)
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const gv_i64
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.load64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = load i64, ptr @gv_i64
|
|
%t = insertelement <2 x i64> %v, i64 %x, i32 0
|
|
ret <2 x i64> %t
|
|
}
|
|
|
|
define void @store_lane_i64_no_offset(<2 x i64> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i64_no_offset:
|
|
; CHECK: .functype store_lane_i64_no_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_no_offset:
|
|
; MEM64: .functype store_lane_i64_no_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_with_folded_offset(<2 x i64> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i64_with_folded_offset:
|
|
; CHECK: .functype store_lane_i64_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 24, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_with_folded_offset:
|
|
; MEM64: .functype store_lane_i64_with_folded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nuw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_with_folded_gep_offset(<2 x i64> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i64_with_folded_gep_offset:
|
|
; CHECK: .functype store_lane_i64_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 48, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_with_folded_gep_offset:
|
|
; MEM64: .functype store_lane_i64_with_folded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 48, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i64, ptr %p, i32 6
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_with_unfolded_gep_negative_offset(<2 x i64> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -48
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_with_unfolded_gep_negative_offset:
|
|
; MEM64: .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const -48
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i64, ptr %p, i32 -6
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_with_unfolded_offset(<2 x i64> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i64_with_unfolded_offset:
|
|
; CHECK: .functype store_lane_i64_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 24
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_with_unfolded_offset:
|
|
; MEM64: .functype store_lane_i64_with_unfolded_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i32.wrap_i64
|
|
; MEM64-NEXT: i32.const 24
|
|
; MEM64-NEXT: i32.add
|
|
; MEM64-NEXT: i64.extend_i32_u
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%q = ptrtoint ptr %p to i32
|
|
%r = add nsw i32 %q, 24
|
|
%s = inttoptr i32 %r to ptr
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_with_unfolded_gep_offset(<2 x i64> %v, ptr %p) {
|
|
; CHECK-LABEL: store_lane_i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_lane_i64_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 48
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_with_unfolded_gep_offset:
|
|
; MEM64: .functype store_lane_i64_with_unfolded_gep_offset (v128, i64) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: local.get 1
|
|
; MEM64-NEXT: i64.const 48
|
|
; MEM64-NEXT: i64.add
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 0, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = getelementptr i64, ptr %p, i32 6
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_to_numeric_address(<2 x i64> %v) {
|
|
; CHECK-LABEL: store_lane_i64_to_numeric_address:
|
|
; CHECK: .functype store_lane_i64_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane 42, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_to_numeric_address:
|
|
; MEM64: .functype store_lane_i64_to_numeric_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane 42, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 42 to ptr
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_lane_i64_from_global_address(<2 x i64> %v) {
|
|
; CHECK-LABEL: store_lane_i64_from_global_address:
|
|
; CHECK: .functype store_lane_i64_from_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store64_lane gv_i64, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
;
|
|
; MEM64-LABEL: store_lane_i64_from_global_address:
|
|
; MEM64: .functype store_lane_i64_from_global_address (v128) -> ()
|
|
; MEM64-NEXT: # %bb.0:
|
|
; MEM64-NEXT: i64.const 0
|
|
; MEM64-NEXT: local.get 0
|
|
; MEM64-NEXT: v128.store64_lane gv_i64, 0
|
|
; MEM64-NEXT: # fallthrough-return
|
|
%x = extractelement <2 x i64> %v, i32 0
|
|
store i64 %x, ptr @gv_i64
|
|
ret void
|
|
}
|