Now that we legalize by widening, the element types here won't change. Previously these were modeled as the elements being widened and then the instruction might become an AND or SHL/ASHR pair. But now they'll become something like a ZERO_EXTEND_VECTOR_INREG/SIGN_EXTEND_VECTOR_INREG. For AVX2, when the destination type is legal its clear the cost should be 1 since we have extend instructions that can produce 256 bit vectors from less than 128 bit vectors. I'm a little less sure about AVX1 costs, but I think the ones I changed were definitely too high, but they might still be too high. Differential Revision: https://reviews.llvm.org/D66169 llvm-svn: 368858
889 lines
48 KiB
LLVM
889 lines
48 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SLM
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW
|
|
|
|
;
|
|
; vXi8
|
|
;
|
|
|
|
define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
|
|
; SSE2-LABEL: @loadext_2i8_to_2i64(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
|
|
; SSE2-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
; SLM-LABEL: @loadext_2i8_to_2i64(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; SLM-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
; AVX-LABEL: @loadext_2i8_to_2i64(
|
|
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
|
|
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
|
|
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
|
|
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
|
|
%i0 = load i8, i8* %p0, align 1
|
|
%i1 = load i8, i8* %p1, align 1
|
|
%x0 = sext i8 %i0 to i64
|
|
%x1 = sext i8 %i1 to i64
|
|
%v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
|
|
%v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
|
|
ret <2 x i64> %v1
|
|
}
|
|
|
|
define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
|
|
; SSE2-LABEL: @loadext_4i8_to_4i32(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
|
|
; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1
|
|
; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32
|
|
; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32
|
|
; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1
|
|
; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2
|
|
; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3
|
|
; SSE2-NEXT: ret <4 x i32> [[V3]]
|
|
;
|
|
; SLM-LABEL: @loadext_4i8_to_4i32(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
|
|
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
|
|
; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
|
|
; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
|
|
; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
|
|
; SLM-NEXT: ret <4 x i32> [[V3]]
|
|
;
|
|
; AVX-LABEL: @loadext_4i8_to_4i32(
|
|
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
|
|
; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
|
|
; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
|
|
; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
|
; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
|
|
; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
|
|
; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
|
|
; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
|
|
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
|
|
; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
|
|
; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
|
|
; AVX-NEXT: ret <4 x i32> [[V3]]
|
|
;
|
|
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
|
|
%p2 = getelementptr inbounds i8, i8* %p0, i64 2
|
|
%p3 = getelementptr inbounds i8, i8* %p0, i64 3
|
|
%i0 = load i8, i8* %p0, align 1
|
|
%i1 = load i8, i8* %p1, align 1
|
|
%i2 = load i8, i8* %p2, align 1
|
|
%i3 = load i8, i8* %p3, align 1
|
|
%x0 = sext i8 %i0 to i32
|
|
%x1 = sext i8 %i1 to i32
|
|
%x2 = sext i8 %i2 to i32
|
|
%x3 = sext i8 %i3 to i32
|
|
%v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
|
|
%v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
|
|
%v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
|
|
%v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
|
|
ret <4 x i32> %v3
|
|
}
|
|
|
|
define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
|
|
; SSE2-LABEL: @loadext_4i8_to_4i64(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
|
|
; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1
|
|
; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
|
|
; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64
|
|
; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
|
|
; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
|
|
; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
|
|
; SSE2-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; SLM-LABEL: @loadext_4i8_to_4i64(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; SLM-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX1-LABEL: @loadext_4i8_to_4i64(
|
|
; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; AVX1-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; AVX1-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; AVX1-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
|
|
; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
|
|
; AVX1-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1
|
|
; AVX1-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1
|
|
; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
|
|
; AVX1-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64
|
|
; AVX1-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64
|
|
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
|
|
; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
|
|
; AVX1-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX2-LABEL: @loadext_4i8_to_4i64(
|
|
; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; AVX2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
|
|
; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
|
|
; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
|
|
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; AVX2-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX512-LABEL: @loadext_4i8_to_4i64(
|
|
; AVX512-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; AVX512-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; AVX512-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
|
|
; AVX512-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
|
|
; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
|
|
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; AVX512-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
|
|
%p2 = getelementptr inbounds i8, i8* %p0, i64 2
|
|
%p3 = getelementptr inbounds i8, i8* %p0, i64 3
|
|
%i0 = load i8, i8* %p0, align 1
|
|
%i1 = load i8, i8* %p1, align 1
|
|
%i2 = load i8, i8* %p2, align 1
|
|
%i3 = load i8, i8* %p3, align 1
|
|
%x0 = sext i8 %i0 to i64
|
|
%x1 = sext i8 %i1 to i64
|
|
%x2 = sext i8 %i2 to i64
|
|
%x3 = sext i8 %i3 to i64
|
|
%v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
|
|
%v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
|
|
%v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
|
|
%v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
|
|
ret <4 x i64> %v3
|
|
}
|
|
|
|
define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
|
|
; CHECK-LABEL: @loadext_8i8_to_8i16(
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
|
|
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
|
|
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
|
|
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
|
|
; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
|
|
; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
|
|
; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
|
|
; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
|
|
; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7
|
|
; CHECK-NEXT: ret <8 x i16> [[V7]]
|
|
;
|
|
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
|
|
%p2 = getelementptr inbounds i8, i8* %p0, i64 2
|
|
%p3 = getelementptr inbounds i8, i8* %p0, i64 3
|
|
%p4 = getelementptr inbounds i8, i8* %p0, i64 4
|
|
%p5 = getelementptr inbounds i8, i8* %p0, i64 5
|
|
%p6 = getelementptr inbounds i8, i8* %p0, i64 6
|
|
%p7 = getelementptr inbounds i8, i8* %p0, i64 7
|
|
%i0 = load i8, i8* %p0, align 1
|
|
%i1 = load i8, i8* %p1, align 1
|
|
%i2 = load i8, i8* %p2, align 1
|
|
%i3 = load i8, i8* %p3, align 1
|
|
%i4 = load i8, i8* %p4, align 1
|
|
%i5 = load i8, i8* %p5, align 1
|
|
%i6 = load i8, i8* %p6, align 1
|
|
%i7 = load i8, i8* %p7, align 1
|
|
%x0 = sext i8 %i0 to i16
|
|
%x1 = sext i8 %i1 to i16
|
|
%x2 = sext i8 %i2 to i16
|
|
%x3 = sext i8 %i3 to i16
|
|
%x4 = sext i8 %i4 to i16
|
|
%x5 = sext i8 %i5 to i16
|
|
%x6 = sext i8 %i6 to i16
|
|
%x7 = sext i8 %i7 to i16
|
|
%v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
|
|
%v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1
|
|
%v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2
|
|
%v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3
|
|
%v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
|
|
%v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
|
|
%v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
|
|
%v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
|
|
ret <8 x i16> %v7
|
|
}
|
|
|
|
define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
|
|
; CHECK-LABEL: @loadext_8i8_to_8i32(
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
|
|
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
|
|
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
|
|
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
|
|
; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
|
|
; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
|
|
; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
|
|
; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
|
|
; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
|
|
; CHECK-NEXT: ret <8 x i32> [[V7]]
|
|
;
|
|
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
|
|
%p2 = getelementptr inbounds i8, i8* %p0, i64 2
|
|
%p3 = getelementptr inbounds i8, i8* %p0, i64 3
|
|
%p4 = getelementptr inbounds i8, i8* %p0, i64 4
|
|
%p5 = getelementptr inbounds i8, i8* %p0, i64 5
|
|
%p6 = getelementptr inbounds i8, i8* %p0, i64 6
|
|
%p7 = getelementptr inbounds i8, i8* %p0, i64 7
|
|
%i0 = load i8, i8* %p0, align 1
|
|
%i1 = load i8, i8* %p1, align 1
|
|
%i2 = load i8, i8* %p2, align 1
|
|
%i3 = load i8, i8* %p3, align 1
|
|
%i4 = load i8, i8* %p4, align 1
|
|
%i5 = load i8, i8* %p5, align 1
|
|
%i6 = load i8, i8* %p6, align 1
|
|
%i7 = load i8, i8* %p7, align 1
|
|
%x0 = sext i8 %i0 to i32
|
|
%x1 = sext i8 %i1 to i32
|
|
%x2 = sext i8 %i2 to i32
|
|
%x3 = sext i8 %i3 to i32
|
|
%x4 = sext i8 %i4 to i32
|
|
%x5 = sext i8 %i5 to i32
|
|
%x6 = sext i8 %i6 to i32
|
|
%x7 = sext i8 %i7 to i32
|
|
%v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
|
|
%v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
|
|
%v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
|
|
%v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
|
|
%v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
|
|
%v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
|
|
%v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
|
|
%v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
|
|
ret <8 x i32> %v7
|
|
}
|
|
|
|
define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
|
|
; CHECK-LABEL: @loadext_16i8_to_16i16(
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
|
|
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
|
|
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
|
|
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
|
|
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
|
|
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
|
|
; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
|
|
; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
|
|
; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
|
|
; CHECK-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
|
|
; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
|
|
; CHECK-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
|
|
; CHECK-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
|
|
; CHECK-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3
|
|
; CHECK-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4
|
|
; CHECK-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5
|
|
; CHECK-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6
|
|
; CHECK-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7
|
|
; CHECK-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8
|
|
; CHECK-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9
|
|
; CHECK-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10
|
|
; CHECK-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11
|
|
; CHECK-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12
|
|
; CHECK-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12
|
|
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13
|
|
; CHECK-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13
|
|
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14
|
|
; CHECK-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14
|
|
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15
|
|
; CHECK-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15
|
|
; CHECK-NEXT: ret <16 x i16> [[V15]]
|
|
;
|
|
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
|
|
%p2 = getelementptr inbounds i8, i8* %p0, i64 2
|
|
%p3 = getelementptr inbounds i8, i8* %p0, i64 3
|
|
%p4 = getelementptr inbounds i8, i8* %p0, i64 4
|
|
%p5 = getelementptr inbounds i8, i8* %p0, i64 5
|
|
%p6 = getelementptr inbounds i8, i8* %p0, i64 6
|
|
%p7 = getelementptr inbounds i8, i8* %p0, i64 7
|
|
%p8 = getelementptr inbounds i8, i8* %p0, i64 8
|
|
%p9 = getelementptr inbounds i8, i8* %p0, i64 9
|
|
%p10 = getelementptr inbounds i8, i8* %p0, i64 10
|
|
%p11 = getelementptr inbounds i8, i8* %p0, i64 11
|
|
%p12 = getelementptr inbounds i8, i8* %p0, i64 12
|
|
%p13 = getelementptr inbounds i8, i8* %p0, i64 13
|
|
%p14 = getelementptr inbounds i8, i8* %p0, i64 14
|
|
%p15 = getelementptr inbounds i8, i8* %p0, i64 15
|
|
%i0 = load i8, i8* %p0, align 1
|
|
%i1 = load i8, i8* %p1, align 1
|
|
%i2 = load i8, i8* %p2, align 1
|
|
%i3 = load i8, i8* %p3, align 1
|
|
%i4 = load i8, i8* %p4, align 1
|
|
%i5 = load i8, i8* %p5, align 1
|
|
%i6 = load i8, i8* %p6, align 1
|
|
%i7 = load i8, i8* %p7, align 1
|
|
%i8 = load i8, i8* %p8, align 1
|
|
%i9 = load i8, i8* %p9, align 1
|
|
%i10 = load i8, i8* %p10, align 1
|
|
%i11 = load i8, i8* %p11, align 1
|
|
%i12 = load i8, i8* %p12, align 1
|
|
%i13 = load i8, i8* %p13, align 1
|
|
%i14 = load i8, i8* %p14, align 1
|
|
%i15 = load i8, i8* %p15, align 1
|
|
%x0 = sext i8 %i0 to i16
|
|
%x1 = sext i8 %i1 to i16
|
|
%x2 = sext i8 %i2 to i16
|
|
%x3 = sext i8 %i3 to i16
|
|
%x4 = sext i8 %i4 to i16
|
|
%x5 = sext i8 %i5 to i16
|
|
%x6 = sext i8 %i6 to i16
|
|
%x7 = sext i8 %i7 to i16
|
|
%x8 = sext i8 %i8 to i16
|
|
%x9 = sext i8 %i9 to i16
|
|
%x10 = sext i8 %i10 to i16
|
|
%x11 = sext i8 %i11 to i16
|
|
%x12 = sext i8 %i12 to i16
|
|
%x13 = sext i8 %i13 to i16
|
|
%x14 = sext i8 %i14 to i16
|
|
%x15 = sext i8 %i15 to i16
|
|
%v0 = insertelement <16 x i16> undef, i16 %x0, i32 0
|
|
%v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1
|
|
%v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2
|
|
%v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3
|
|
%v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4
|
|
%v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5
|
|
%v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6
|
|
%v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7
|
|
%v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8
|
|
%v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9
|
|
%v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10
|
|
%v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11
|
|
%v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12
|
|
%v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13
|
|
%v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14
|
|
%v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15
|
|
ret <16 x i16> %v15
|
|
}
|
|
|
|
;
|
|
; vXi16
|
|
;
|
|
|
|
define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
|
|
; SSE2-LABEL: @loadext_2i16_to_2i64(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
|
|
; SSE2-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
; SLM-LABEL: @loadext_2i16_to_2i64(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; SLM-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
; AVX-LABEL: @loadext_2i16_to_2i64(
|
|
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
|
|
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
|
|
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
|
|
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
%p1 = getelementptr inbounds i16, i16* %p0, i64 1
|
|
%i0 = load i16, i16* %p0, align 1
|
|
%i1 = load i16, i16* %p1, align 1
|
|
%x0 = sext i16 %i0 to i64
|
|
%x1 = sext i16 %i1 to i64
|
|
%v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
|
|
%v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
|
|
ret <2 x i64> %v1
|
|
}
|
|
|
|
define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
|
|
; CHECK-LABEL: @loadext_4i16_to_4i32(
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
|
|
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
|
|
; CHECK-NEXT: ret <4 x i32> [[V3]]
|
|
;
|
|
%p1 = getelementptr inbounds i16, i16* %p0, i64 1
|
|
%p2 = getelementptr inbounds i16, i16* %p0, i64 2
|
|
%p3 = getelementptr inbounds i16, i16* %p0, i64 3
|
|
%i0 = load i16, i16* %p0, align 1
|
|
%i1 = load i16, i16* %p1, align 1
|
|
%i2 = load i16, i16* %p2, align 1
|
|
%i3 = load i16, i16* %p3, align 1
|
|
%x0 = sext i16 %i0 to i32
|
|
%x1 = sext i16 %i1 to i32
|
|
%x2 = sext i16 %i2 to i32
|
|
%x3 = sext i16 %i3 to i32
|
|
%v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
|
|
%v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
|
|
%v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
|
|
%v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
|
|
ret <4 x i32> %v3
|
|
}
|
|
|
|
define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
|
|
; SSE2-LABEL: @loadext_4i16_to_4i64(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1
|
|
; SSE2-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1
|
|
; SSE2-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64
|
|
; SSE2-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64
|
|
; SSE2-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
|
|
; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
|
|
; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
|
|
; SSE2-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; SLM-LABEL: @loadext_4i16_to_4i64(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; SLM-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX1-LABEL: @loadext_4i16_to_4i64(
|
|
; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; AVX1-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; AVX1-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; AVX1-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
|
|
; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
|
|
; AVX1-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1
|
|
; AVX1-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1
|
|
; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
|
|
; AVX1-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64
|
|
; AVX1-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64
|
|
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
|
|
; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
|
|
; AVX1-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX2-LABEL: @loadext_4i16_to_4i64(
|
|
; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; AVX2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
|
|
; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
|
|
; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
|
|
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; AVX2-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX512-LABEL: @loadext_4i16_to_4i64(
|
|
; AVX512-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; AVX512-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; AVX512-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
|
|
; AVX512-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
|
|
; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
|
|
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; AVX512-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
%p1 = getelementptr inbounds i16, i16* %p0, i64 1
|
|
%p2 = getelementptr inbounds i16, i16* %p0, i64 2
|
|
%p3 = getelementptr inbounds i16, i16* %p0, i64 3
|
|
%i0 = load i16, i16* %p0, align 1
|
|
%i1 = load i16, i16* %p1, align 1
|
|
%i2 = load i16, i16* %p2, align 1
|
|
%i3 = load i16, i16* %p3, align 1
|
|
%x0 = sext i16 %i0 to i64
|
|
%x1 = sext i16 %i1 to i64
|
|
%x2 = sext i16 %i2 to i64
|
|
%x3 = sext i16 %i3 to i64
|
|
%v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
|
|
%v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
|
|
%v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
|
|
%v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
|
|
ret <4 x i64> %v3
|
|
}
|
|
|
|
define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
|
|
; CHECK-LABEL: @loadext_8i16_to_8i32(
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
|
|
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
|
|
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
|
|
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
|
|
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
|
|
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
|
|
; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
|
|
; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
|
|
; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
|
|
; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
|
|
; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
|
|
; CHECK-NEXT: ret <8 x i32> [[V7]]
|
|
;
|
|
%p1 = getelementptr inbounds i16, i16* %p0, i64 1
|
|
%p2 = getelementptr inbounds i16, i16* %p0, i64 2
|
|
%p3 = getelementptr inbounds i16, i16* %p0, i64 3
|
|
%p4 = getelementptr inbounds i16, i16* %p0, i64 4
|
|
%p5 = getelementptr inbounds i16, i16* %p0, i64 5
|
|
%p6 = getelementptr inbounds i16, i16* %p0, i64 6
|
|
%p7 = getelementptr inbounds i16, i16* %p0, i64 7
|
|
%i0 = load i16, i16* %p0, align 1
|
|
%i1 = load i16, i16* %p1, align 1
|
|
%i2 = load i16, i16* %p2, align 1
|
|
%i3 = load i16, i16* %p3, align 1
|
|
%i4 = load i16, i16* %p4, align 1
|
|
%i5 = load i16, i16* %p5, align 1
|
|
%i6 = load i16, i16* %p6, align 1
|
|
%i7 = load i16, i16* %p7, align 1
|
|
%x0 = sext i16 %i0 to i32
|
|
%x1 = sext i16 %i1 to i32
|
|
%x2 = sext i16 %i2 to i32
|
|
%x3 = sext i16 %i3 to i32
|
|
%x4 = sext i16 %i4 to i32
|
|
%x5 = sext i16 %i5 to i32
|
|
%x6 = sext i16 %i6 to i32
|
|
%x7 = sext i16 %i7 to i32
|
|
%v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
|
|
%v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
|
|
%v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
|
|
%v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
|
|
%v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
|
|
%v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
|
|
%v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
|
|
%v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
|
|
ret <8 x i32> %v7
|
|
}
|
|
|
|
;
|
|
; vXi32
|
|
;
|
|
|
|
define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
|
|
; SSE2-LABEL: @loadext_2i32_to_2i64(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
|
|
; SSE2-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
; SLM-LABEL: @loadext_2i32_to_2i64(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; SLM-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
; AVX-LABEL: @loadext_2i32_to_2i64(
|
|
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
|
|
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
|
|
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
|
|
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX-NEXT: ret <2 x i64> [[V1]]
|
|
;
|
|
%p1 = getelementptr inbounds i32, i32* %p0, i64 1
|
|
%i0 = load i32, i32* %p0, align 1
|
|
%i1 = load i32, i32* %p1, align 1
|
|
%x0 = sext i32 %i0 to i64
|
|
%x1 = sext i32 %i1 to i64
|
|
%v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
|
|
%v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
|
|
ret <2 x i64> %v1
|
|
}
|
|
|
|
define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
|
|
; SSE2-LABEL: @loadext_4i32_to_4i64(
|
|
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
|
|
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
|
|
; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1
|
|
; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1
|
|
; SSE2-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1
|
|
; SSE2-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1
|
|
; SSE2-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64
|
|
; SSE2-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64
|
|
; SSE2-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64
|
|
; SSE2-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64
|
|
; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
|
|
; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
|
|
; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
|
|
; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
|
|
; SSE2-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; SLM-LABEL: @loadext_4i32_to_4i64(
|
|
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
|
|
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
|
|
; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
|
|
; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
|
|
; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
|
|
; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; SLM-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX1-LABEL: @loadext_4i32_to_4i64(
|
|
; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; AVX1-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
|
|
; AVX1-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
|
|
; AVX1-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
|
|
; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
|
|
; AVX1-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1
|
|
; AVX1-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1
|
|
; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
|
|
; AVX1-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64
|
|
; AVX1-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64
|
|
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
|
; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
|
|
; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
|
|
; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
|
|
; AVX1-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX2-LABEL: @loadext_4i32_to_4i64(
|
|
; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
|
|
; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
|
|
; AVX2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
|
|
; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
|
|
; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
|
|
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; AVX2-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
; AVX512-LABEL: @loadext_4i32_to_4i64(
|
|
; AVX512-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
|
|
; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
|
|
; AVX512-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
|
|
; AVX512-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
|
|
; AVX512-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
|
|
; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
|
|
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
|
|
; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
|
|
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
|
|
; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
|
|
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
|
|
; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
|
|
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
|
|
; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
|
|
; AVX512-NEXT: ret <4 x i64> [[V3]]
|
|
;
|
|
%p1 = getelementptr inbounds i32, i32* %p0, i64 1
|
|
%p2 = getelementptr inbounds i32, i32* %p0, i64 2
|
|
%p3 = getelementptr inbounds i32, i32* %p0, i64 3
|
|
%i0 = load i32, i32* %p0, align 1
|
|
%i1 = load i32, i32* %p1, align 1
|
|
%i2 = load i32, i32* %p2, align 1
|
|
%i3 = load i32, i32* %p3, align 1
|
|
%x0 = sext i32 %i0 to i64
|
|
%x1 = sext i32 %i1 to i64
|
|
%x2 = sext i32 %i2 to i64
|
|
%x3 = sext i32 %i3 to i64
|
|
%v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
|
|
%v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
|
|
%v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
|
|
%v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
|
|
ret <4 x i64> %v3
|
|
}
|