The original commit exposed several missing dependencies (e.g. latent bugs in SLP scheduling). Most of these were fixed over the weekend and have had several days to bake. The last was fixed this morning after being noticed in manual review of test changes yesterday. See the review thread for links to each change. Original commit message follows: SLP currently schedules all instructions within a scheduling window which stretches from the first instruction potentially vectorized to the last. This window can include a very large number of unrelated instructions which are not being considered for vectorization. This change switches the code to only schedule the sub-graph consisting of the instructions being vectorized and their transitive users. This has the effect of greatly reducing the amount of work performed in large basic blocks, and thus greatly improves compile time on degenerate examples. To understand the effects, I added some statistics (not planned for upstream contribution). Here's an illustration from my motivating example: Before this patch: 704357 SLP - Number of calcDeps actions 699021 SLP - Number of schedule calls 5598 SLP - Number of ReSchedule actions 59 SLP - Number of ReScheduleOnFail actions 10084 SLP - Number of schedule resets 8523 SLP - Number of vector instructions generated After this patch: 102895 SLP - Number of calcDeps actions 161916 SLP - Number of schedule calls 5637 SLP - Number of ReSchedule actions 55 SLP - Number of ReScheduleOnFail actions 10083 SLP - Number of schedule resets 8403 SLP - Number of vector instructions generated I do want to highlight that there is a small difference in number of generated vector instructions. This example is hitting the bailout due to maximum window size, and the change in scheduling is slightly perturbing when and how we hit it. This can be seen in the RescheduleOnFail counter change. Given that, I think we can safely ignore. The downside of this change can be seen in the large test diff. We group all vectorizable instructions together at the bottom of the scheduling region. This means that vector instructions can move quite far from their original point in code. While maybe undesirable, I don't see this as being a major problem as this pass is not intended to be a general scheduling pass. For context, it's worth noting that the pre-scheduling that SLP does while building the vector tree is exactly the sub-graph scheduling implemented by this patch. Differential Revision: https://reviews.llvm.org/D118538
1152 lines
84 KiB
LLVM
1152 lines
84 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
@src64 = common global [8 x i64] zeroinitializer, align 64
|
|
@src32 = common global [16 x i32] zeroinitializer, align 64
|
|
@src16 = common global [32 x i16] zeroinitializer, align 64
|
|
@src8 = common global [64 x i8] zeroinitializer, align 64
|
|
|
|
@dst64 = common global [8 x double] zeroinitializer, align 64
|
|
@dst32 = common global [16 x float] zeroinitializer, align 64
|
|
|
|
;
|
|
; SITOFP to vXf64
|
|
;
|
|
|
|
define void @sitofp_2i64_2f64() #0 {
|
|
; SSE-LABEL: @sitofp_2i64_2f64(
|
|
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
|
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
|
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256NODQ-LABEL: @sitofp_2i64_2f64(
|
|
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
|
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
|
; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
; AVX256NODQ-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_2i64_2f64(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
|
|
; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
; AVX256DQ-LABEL: @sitofp_2i64_2f64(
|
|
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
|
|
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
|
|
; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; AVX256DQ-NEXT: ret void
|
|
;
|
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
%cvt0 = sitofp i64 %ld0 to double
|
|
%cvt1 = sitofp i64 %ld1 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i64_4f64() #0 {
|
|
; SSE-LABEL: @sitofp_4i64_4f64(
|
|
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
|
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
|
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
|
|
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
|
|
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256NODQ-LABEL: @sitofp_4i64_4f64(
|
|
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
|
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
|
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
|
|
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
|
|
; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
; AVX256NODQ-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
; AVX256NODQ-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
; AVX256NODQ-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_4i64_4f64(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
|
|
; AVX512-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
; AVX256DQ-LABEL: @sitofp_4i64_4f64(
|
|
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
|
|
; AVX256DQ-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX256DQ-NEXT: ret void
|
|
;
|
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
%ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
%ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
%cvt0 = sitofp i64 %ld0 to double
|
|
%cvt1 = sitofp i64 %ld1 to double
|
|
%cvt2 = sitofp i64 %ld2 to double
|
|
%cvt3 = sitofp i64 %ld3 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i64_8f64() #0 {
|
|
; SSE-LABEL: @sitofp_8i64_8f64(
|
|
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
|
; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
|
; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
|
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
|
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
|
|
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
|
|
; SSE-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
|
|
; SSE-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
|
|
; SSE-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
|
|
; SSE-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
|
|
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
|
; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
|
; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
|
; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256NODQ-LABEL: @sitofp_8i64_8f64(
|
|
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
|
; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
|
; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
|
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
|
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
|
|
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
|
|
; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
|
|
; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
|
|
; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
|
|
; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
|
|
; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
; AVX256NODQ-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
; AVX256NODQ-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
; AVX256NODQ-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
|
; AVX256NODQ-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
|
; AVX256NODQ-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
|
; AVX256NODQ-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
|
; AVX256NODQ-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_8i64_8f64(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x double>
|
|
; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
; AVX256DQ-LABEL: @sitofp_8i64_8f64(
|
|
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
|
|
; AVX256DQ-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX256DQ-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
|
|
; AVX256DQ-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x double>
|
|
; AVX256DQ-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
|
|
; AVX256DQ-NEXT: ret void
|
|
;
|
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
%ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
%ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
%ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
|
%ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
|
%ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
%ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
%cvt0 = sitofp i64 %ld0 to double
|
|
%cvt1 = sitofp i64 %ld1 to double
|
|
%cvt2 = sitofp i64 %ld2 to double
|
|
%cvt3 = sitofp i64 %ld3 to double
|
|
%cvt4 = sitofp i64 %ld4 to double
|
|
%cvt5 = sitofp i64 %ld5 to double
|
|
%cvt6 = sitofp i64 %ld6 to double
|
|
%cvt7 = sitofp i64 %ld7 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
|
store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
|
store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
|
store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_2i32_2f64() #0 {
|
|
; CHECK-LABEL: @sitofp_2i32_2f64(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
|
; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
|
%cvt0 = sitofp i32 %ld0 to double
|
|
%cvt1 = sitofp i32 %ld1 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i32_4f64() #0 {
|
|
; SSE-LABEL: @sitofp_4i32_4f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_4i32_4f64(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
|
|
; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
|
%ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
|
%ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
|
%cvt0 = sitofp i32 %ld0 to double
|
|
%cvt1 = sitofp i32 %ld1 to double
|
|
%cvt2 = sitofp i32 %ld2 to double
|
|
%cvt3 = sitofp i32 %ld3 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i32_8f64() #0 {
|
|
; SSE-LABEL: @sitofp_8i32_8f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
|
; SSE-NEXT: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
|
|
; SSE-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
|
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256-LABEL: @sitofp_8i32_8f64(
|
|
; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
|
|
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
|
|
; AVX256-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
|
|
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x double>
|
|
; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
|
|
; AVX256-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_8i32_8f64(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x double>
|
|
; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
|
%ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
|
%ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
|
%ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
|
|
%ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
|
|
%ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
|
|
%ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
|
|
%cvt0 = sitofp i32 %ld0 to double
|
|
%cvt1 = sitofp i32 %ld1 to double
|
|
%cvt2 = sitofp i32 %ld2 to double
|
|
%cvt3 = sitofp i32 %ld3 to double
|
|
%cvt4 = sitofp i32 %ld4 to double
|
|
%cvt5 = sitofp i32 %ld5 to double
|
|
%cvt6 = sitofp i32 %ld6 to double
|
|
%cvt7 = sitofp i32 %ld7 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
|
store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
|
store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
|
store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_2i16_2f64() #0 {
|
|
; CHECK-LABEL: @sitofp_2i16_2f64(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
|
|
; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
|
%cvt0 = sitofp i16 %ld0 to double
|
|
%cvt1 = sitofp i16 %ld1 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i16_4f64() #0 {
|
|
; SSE-LABEL: @sitofp_4i16_4f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_4i16_4f64(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
|
|
; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
|
%ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
|
%ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
|
%cvt0 = sitofp i16 %ld0 to double
|
|
%cvt1 = sitofp i16 %ld1 to double
|
|
%cvt2 = sitofp i16 %ld2 to double
|
|
%cvt3 = sitofp i16 %ld3 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i16_8f64() #0 {
|
|
; SSE-LABEL: @sitofp_8i16_8f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
|
; SSE-NEXT: [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <2 x i16>*), align 8
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i16> [[TMP5]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
|
|
; SSE-NEXT: [[TMP7:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6) to <2 x i16>*), align 4
|
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i16> [[TMP7]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256-LABEL: @sitofp_8i16_8f64(
|
|
; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
|
|
; AVX256-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
|
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x double>
|
|
; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
|
|
; AVX256-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_8i16_8f64(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x double>
|
|
; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
|
%ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
|
%ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
|
%ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
|
%ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
|
%ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
|
%ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
|
%cvt0 = sitofp i16 %ld0 to double
|
|
%cvt1 = sitofp i16 %ld1 to double
|
|
%cvt2 = sitofp i16 %ld2 to double
|
|
%cvt3 = sitofp i16 %ld3 to double
|
|
%cvt4 = sitofp i16 %ld4 to double
|
|
%cvt5 = sitofp i16 %ld5 to double
|
|
%cvt6 = sitofp i16 %ld6 to double
|
|
%cvt7 = sitofp i16 %ld7 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
|
store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
|
store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
|
store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_2i8_2f64() #0 {
|
|
; CHECK-LABEL: @sitofp_2i8_2f64(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
|
|
; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
|
|
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
|
|
%cvt0 = sitofp i8 %ld0 to double
|
|
%cvt1 = sitofp i8 %ld1 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i8_4f64() #0 {
|
|
; SSE-LABEL: @sitofp_4i8_4f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_4i8_4f64(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
|
|
; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
|
|
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
|
|
%ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
|
|
%ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
|
|
%cvt0 = sitofp i8 %ld0 to double
|
|
%cvt1 = sitofp i8 %ld1 to double
|
|
%cvt2 = sitofp i8 %ld2 to double
|
|
%cvt3 = sitofp i8 %ld3 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i8_8f64() #0 {
|
|
; SSE-LABEL: @sitofp_8i8_8f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
|
; SSE-NEXT: [[TMP5:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <2 x i8>*), align 4
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i8> [[TMP5]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
|
|
; SSE-NEXT: [[TMP7:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6) to <2 x i8>*), align 2
|
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i8> [[TMP7]] to <2 x double>
|
|
; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256-LABEL: @sitofp_8i8_8f64(
|
|
; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
|
|
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
|
|
; AVX256-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
|
|
; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
|
|
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x double>
|
|
; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
|
|
; AVX256-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_8i8_8f64(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x double>
|
|
; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
|
|
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
|
|
%ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
|
|
%ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
|
|
%ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
|
|
%ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
|
|
%ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
|
|
%ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
|
|
%cvt0 = sitofp i8 %ld0 to double
|
|
%cvt1 = sitofp i8 %ld1 to double
|
|
%cvt2 = sitofp i8 %ld2 to double
|
|
%cvt3 = sitofp i8 %ld3 to double
|
|
%cvt4 = sitofp i8 %ld4 to double
|
|
%cvt5 = sitofp i8 %ld5 to double
|
|
%cvt6 = sitofp i8 %ld6 to double
|
|
%cvt7 = sitofp i8 %ld7 to double
|
|
store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
|
store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
|
store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
|
store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
|
store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
|
store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
|
store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
|
store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SITOFP to vXf32
|
|
;
|
|
|
|
define void @sitofp_2i64_2f32() #0 {
|
|
; CHECK-LABEL: @sitofp_2i64_2f32(
|
|
; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
|
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
|
; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
%cvt0 = sitofp i64 %ld0 to float
|
|
%cvt1 = sitofp i64 %ld1 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i64_4f32() #0 {
|
|
; CHECK-LABEL: @sitofp_4i64_4f32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
%ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
%ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
%cvt0 = sitofp i64 %ld0 to float
|
|
%cvt1 = sitofp i64 %ld1 to float
|
|
%cvt2 = sitofp i64 %ld2 to float
|
|
%cvt3 = sitofp i64 %ld3 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i64_8f32() #0 {
|
|
; SSE-LABEL: @sitofp_8i64_8f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_8i64_8f32(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
|
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
|
%ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
|
%ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
%ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
|
%ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
|
%ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
%ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
%cvt0 = sitofp i64 %ld0 to float
|
|
%cvt1 = sitofp i64 %ld1 to float
|
|
%cvt2 = sitofp i64 %ld2 to float
|
|
%cvt3 = sitofp i64 %ld3 to float
|
|
%cvt4 = sitofp i64 %ld4 to float
|
|
%cvt5 = sitofp i64 %ld5 to float
|
|
%cvt6 = sitofp i64 %ld6 to float
|
|
%cvt7 = sitofp i64 %ld7 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i32_4f32() #0 {
|
|
; CHECK-LABEL: @sitofp_4i32_4f32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
|
%ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
|
%ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
|
%cvt0 = sitofp i32 %ld0 to float
|
|
%cvt1 = sitofp i32 %ld1 to float
|
|
%cvt2 = sitofp i32 %ld2 to float
|
|
%cvt3 = sitofp i32 %ld3 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i32_8f32() #0 {
|
|
; SSE-LABEL: @sitofp_8i32_8f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_8i32_8f32(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
|
|
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
|
%ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
|
%ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
|
%ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
|
|
%ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
|
|
%ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
|
|
%ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
|
|
%cvt0 = sitofp i32 %ld0 to float
|
|
%cvt1 = sitofp i32 %ld1 to float
|
|
%cvt2 = sitofp i32 %ld2 to float
|
|
%cvt3 = sitofp i32 %ld3 to float
|
|
%cvt4 = sitofp i32 %ld4 to float
|
|
%cvt5 = sitofp i32 %ld5 to float
|
|
%cvt6 = sitofp i32 %ld6 to float
|
|
%cvt7 = sitofp i32 %ld7 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_16i32_16f32() #0 {
|
|
; SSE-LABEL: @sitofp_16i32_16f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i32> [[TMP5]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
|
|
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16
|
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[TMP7]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256-LABEL: @sitofp_16i32_16f32(
|
|
; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
|
|
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
|
|
; AVX256-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32
|
|
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[TMP3]] to <8 x float>
|
|
; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
|
|
; AVX256-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_16i32_16f32(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i32> [[TMP1]] to <16 x float>
|
|
; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64
|
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4
|
|
%ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8
|
|
%ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4
|
|
%ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16
|
|
%ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4
|
|
%ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8
|
|
%ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4
|
|
%ld8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32
|
|
%ld9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4
|
|
%ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8
|
|
%ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4
|
|
%ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16
|
|
%ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4
|
|
%ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8
|
|
%ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4
|
|
%cvt0 = sitofp i32 %ld0 to float
|
|
%cvt1 = sitofp i32 %ld1 to float
|
|
%cvt2 = sitofp i32 %ld2 to float
|
|
%cvt3 = sitofp i32 %ld3 to float
|
|
%cvt4 = sitofp i32 %ld4 to float
|
|
%cvt5 = sitofp i32 %ld5 to float
|
|
%cvt6 = sitofp i32 %ld6 to float
|
|
%cvt7 = sitofp i32 %ld7 to float
|
|
%cvt8 = sitofp i32 %ld8 to float
|
|
%cvt9 = sitofp i32 %ld9 to float
|
|
%cvt10 = sitofp i32 %ld10 to float
|
|
%cvt11 = sitofp i32 %ld11 to float
|
|
%cvt12 = sitofp i32 %ld12 to float
|
|
%cvt13 = sitofp i32 %ld13 to float
|
|
%cvt14 = sitofp i32 %ld14 to float
|
|
%cvt15 = sitofp i32 %ld15 to float
|
|
store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
|
|
store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
|
|
store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
|
|
store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
|
|
store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
|
|
store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
|
|
store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
|
|
store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
|
|
store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
|
|
store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
|
|
store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
|
|
store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
|
|
store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
|
|
store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
|
|
store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
|
|
store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i16_4f32() #0 {
|
|
; CHECK-LABEL: @sitofp_4i16_4f32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
|
%ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
|
%ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
|
%cvt0 = sitofp i16 %ld0 to float
|
|
%cvt1 = sitofp i16 %ld1 to float
|
|
%cvt2 = sitofp i16 %ld2 to float
|
|
%cvt3 = sitofp i16 %ld3 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i16_8f32() #0 {
|
|
; SSE-LABEL: @sitofp_8i16_8f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_8i16_8f32(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
|
|
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
|
%ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
|
%ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
|
%ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
|
%ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
|
%ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
|
%ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
|
%cvt0 = sitofp i16 %ld0 to float
|
|
%cvt1 = sitofp i16 %ld1 to float
|
|
%cvt2 = sitofp i16 %ld2 to float
|
|
%cvt3 = sitofp i16 %ld3 to float
|
|
%cvt4 = sitofp i16 %ld4 to float
|
|
%cvt5 = sitofp i16 %ld5 to float
|
|
%cvt6 = sitofp i16 %ld6 to float
|
|
%cvt7 = sitofp i16 %ld7 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_16i16_16f32() #0 {
|
|
; SSE-LABEL: @sitofp_16i16_16f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i16> [[TMP5]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
|
|
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
|
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256-LABEL: @sitofp_16i16_16f32(
|
|
; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
|
|
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
|
|
; AVX256-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16
|
|
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i16> [[TMP3]] to <8 x float>
|
|
; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
|
|
; AVX256-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_16i16_16f32(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x float>
|
|
; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64
|
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2
|
|
%ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4
|
|
%ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2
|
|
%ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8
|
|
%ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2
|
|
%ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4
|
|
%ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2
|
|
%ld8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16
|
|
%ld9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2
|
|
%ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
|
|
%ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
|
|
%ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
|
|
%ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
|
|
%ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
|
|
%ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
|
|
%cvt0 = sitofp i16 %ld0 to float
|
|
%cvt1 = sitofp i16 %ld1 to float
|
|
%cvt2 = sitofp i16 %ld2 to float
|
|
%cvt3 = sitofp i16 %ld3 to float
|
|
%cvt4 = sitofp i16 %ld4 to float
|
|
%cvt5 = sitofp i16 %ld5 to float
|
|
%cvt6 = sitofp i16 %ld6 to float
|
|
%cvt7 = sitofp i16 %ld7 to float
|
|
%cvt8 = sitofp i16 %ld8 to float
|
|
%cvt9 = sitofp i16 %ld9 to float
|
|
%cvt10 = sitofp i16 %ld10 to float
|
|
%cvt11 = sitofp i16 %ld11 to float
|
|
%cvt12 = sitofp i16 %ld12 to float
|
|
%cvt13 = sitofp i16 %ld13 to float
|
|
%cvt14 = sitofp i16 %ld14 to float
|
|
%cvt15 = sitofp i16 %ld15 to float
|
|
store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
|
|
store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
|
|
store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
|
|
store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
|
|
store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
|
|
store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
|
|
store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
|
|
store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
|
|
store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
|
|
store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
|
|
store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
|
|
store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
|
|
store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
|
|
store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
|
|
store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
|
|
store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_4i8_4f32() #0 {
|
|
; CHECK-LABEL: @sitofp_4i8_4f32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
|
|
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
|
|
%ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
|
|
%ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
|
|
%cvt0 = sitofp i8 %ld0 to float
|
|
%cvt1 = sitofp i8 %ld1 to float
|
|
%cvt2 = sitofp i8 %ld2 to float
|
|
%cvt3 = sitofp i8 %ld3 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_8i8_8f32() #0 {
|
|
; SSE-LABEL: @sitofp_8i8_8f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX-LABEL: @sitofp_8i8_8f32(
|
|
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
|
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
|
|
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX-NEXT: ret void
|
|
;
|
|
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
|
|
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
|
|
%ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
|
|
%ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
|
|
%ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
|
|
%ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
|
|
%ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
|
|
%ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
|
|
%cvt0 = sitofp i8 %ld0 to float
|
|
%cvt1 = sitofp i8 %ld1 to float
|
|
%cvt2 = sitofp i8 %ld2 to float
|
|
%cvt3 = sitofp i8 %ld3 to float
|
|
%cvt4 = sitofp i8 %ld4 to float
|
|
%cvt5 = sitofp i8 %ld5 to float
|
|
%cvt6 = sitofp i8 %ld6 to float
|
|
%cvt7 = sitofp i8 %ld7 to float
|
|
store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
ret void
|
|
}
|
|
|
|
define void @sitofp_16i8_16f32() #0 {
|
|
; SSE-LABEL: @sitofp_16i8_16f32(
|
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
|
|
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
|
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
|
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i8> [[TMP5]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
|
|
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4
|
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i8> [[TMP7]] to <4 x float>
|
|
; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
|
|
; SSE-NEXT: ret void
|
|
;
|
|
; AVX256-LABEL: @sitofp_16i8_16f32(
|
|
; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
|
|
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
|
|
; AVX256-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8
|
|
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i8> [[TMP3]] to <8 x float>
|
|
; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
|
|
; AVX256-NEXT: ret void
|
|
;
|
|
; AVX512-LABEL: @sitofp_16i8_16f32(
|
|
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64
|
|
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i8> [[TMP1]] to <16 x float>
|
|
; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
|
|
; AVX512-NEXT: ret void
|
|
;
|
|
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64
|
|
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1
|
|
%ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2
|
|
%ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1
|
|
%ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4
|
|
%ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1
|
|
%ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2
|
|
%ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1
|
|
%ld8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8
|
|
%ld9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1
|
|
%ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2
|
|
%ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1
|
|
%ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4
|
|
%ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1
|
|
%ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2
|
|
%ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1
|
|
%cvt0 = sitofp i8 %ld0 to float
|
|
%cvt1 = sitofp i8 %ld1 to float
|
|
%cvt2 = sitofp i8 %ld2 to float
|
|
%cvt3 = sitofp i8 %ld3 to float
|
|
%cvt4 = sitofp i8 %ld4 to float
|
|
%cvt5 = sitofp i8 %ld5 to float
|
|
%cvt6 = sitofp i8 %ld6 to float
|
|
%cvt7 = sitofp i8 %ld7 to float
|
|
%cvt8 = sitofp i8 %ld8 to float
|
|
%cvt9 = sitofp i8 %ld9 to float
|
|
%cvt10 = sitofp i8 %ld10 to float
|
|
%cvt11 = sitofp i8 %ld11 to float
|
|
%cvt12 = sitofp i8 %ld12 to float
|
|
%cvt13 = sitofp i8 %ld13 to float
|
|
%cvt14 = sitofp i8 %ld14 to float
|
|
%cvt15 = sitofp i8 %ld15 to float
|
|
store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
|
|
store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
|
|
store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
|
|
store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
|
|
store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
|
|
store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
|
|
store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
|
|
store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
|
|
store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
|
|
store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
|
|
store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
|
|
store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
|
|
store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
|
|
store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
|
|
store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
|
|
store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SITOFP BUILDVECTOR
|
|
;
|
|
|
|
define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
|
; SSE-LABEL: @sitofp_4xi32_4f64(
|
|
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
|
|
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
|
; SSE-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
|
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
|
|
; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1
|
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
|
|
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
; SSE-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
|
; SSE-NEXT: ret <4 x double> [[RES31]]
|
|
;
|
|
; AVX-LABEL: @sitofp_4xi32_4f64(
|
|
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
|
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
|
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
|
|
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
|
|
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x double>
|
|
; AVX-NEXT: ret <4 x double> [[TMP5]]
|
|
;
|
|
%cvt0 = sitofp i32 %a0 to double
|
|
%cvt1 = sitofp i32 %a1 to double
|
|
%cvt2 = sitofp i32 %a2 to double
|
|
%cvt3 = sitofp i32 %a3 to double
|
|
%res0 = insertelement <4 x double> undef, double %cvt0, i32 0
|
|
%res1 = insertelement <4 x double> %res0, double %cvt1, i32 1
|
|
%res2 = insertelement <4 x double> %res1, double %cvt2, i32 2
|
|
%res3 = insertelement <4 x double> %res2, double %cvt3, i32 3
|
|
ret <4 x double> %res3
|
|
}
|
|
|
|
define <4 x double> @sitofp_with_const_4xi32_4f64(i32 %a2, i32 %a3) #0 {
|
|
; CHECK-LABEL: @sitofp_with_const_4xi32_4f64(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
|
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[RES0]], <4 x double> [[TMP4]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
|
; CHECK-NEXT: ret <4 x double> [[RES31]]
|
|
;
|
|
%cvt2 = sitofp i32 %a2 to double
|
|
%cvt3 = sitofp i32 %a3 to double
|
|
%res0 = insertelement <4 x double> undef, double 1.0, i32 3
|
|
%res2 = insertelement <4 x double> %res0, double %cvt2, i32 0
|
|
%res3 = insertelement <4 x double> %res2, double %cvt3, i32 1
|
|
ret <4 x double> %res3
|
|
}
|
|
|
|
define <4 x double> @sitofp_with_undef_4xi32_4f64(i32 %a2, i32 %a3) #0 {
|
|
; CHECK-LABEL: @sitofp_with_undef_4xi32_4f64(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
; CHECK-NEXT: ret <4 x double> [[TMP4]]
|
|
;
|
|
%cvt2 = sitofp i32 %a2 to double
|
|
%cvt3 = sitofp i32 %a3 to double
|
|
%res2 = insertelement <4 x double> undef, double %cvt2, i32 0
|
|
%res3 = insertelement <4 x double> %res2, double %cvt3, i32 1
|
|
ret <4 x double> %res3
|
|
}
|
|
|
|
define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
|
; CHECK-LABEL: @sitofp_4xi32_4f32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
|
|
; CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
;
|
|
%cvt0 = sitofp i32 %a0 to float
|
|
%cvt1 = sitofp i32 %a1 to float
|
|
%cvt2 = sitofp i32 %a2 to float
|
|
%cvt3 = sitofp i32 %a3 to float
|
|
%res0 = insertelement <4 x float> undef, float %cvt0, i32 0
|
|
%res1 = insertelement <4 x float> %res0, float %cvt1, i32 1
|
|
%res2 = insertelement <4 x float> %res1, float %cvt2, i32 2
|
|
%res3 = insertelement <4 x float> %res2, float %cvt3, i32 3
|
|
ret <4 x float> %res3
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|