
This reverts commit cf002847a464c004a57ca4777251b1aafc33d958 i.e., relands ba603b5e4d44f1a25207a2a00196471d2ba93424. It was reverted because it was subtly wrong: multiplying an uninitialized zero should not result in an initialized zero. This reland fixes the issue by using instrumentation analogous to visitAnd (bitwise AND of an initialized zero and an uninitialized value results in an initialized value). Additionally, this reland expands a test case; fixes the commit message; and optimizes the change to avoid the need for horizontalReduce. The current instrumentation has false positives: it does not take into account that multiplying an initialized zero value with an uninitialized value results in an initialized zero value This change fixes the issue during the multiplication step. The horizontal add step is modeled using bitwise OR. Future work can apply this improved handler to the AVX512 equivalent intrinsics (x86_avx512_pmaddw_d_512, x86_avx512_pmaddubs_w_512.) and AVX VNNI intrinsics.
124 lines
6.9 KiB
LLVM
124 lines
6.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck %s
|
|
; REQUIRES: x86-registered-target
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
|
|
declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone
|
|
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
|
|
declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone
|
|
|
|
define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory {
|
|
; CHECK-LABEL: define <4 x i32> @Test_sse2_pmadd_wd(
|
|
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
|
|
; CHECK-NEXT: call void @llvm.donothing()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[TMP0]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[A]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[B]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i1> [[TMP2]], [[TMP12]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP3]], [[TMP12]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i1> [[TMP6]], [[TMP13]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i1> [[TMP15]], [[TMP14]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16>
|
|
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP11]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i32> [[TMP16]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP17]] to <4 x i32>
|
|
; CHECK-NEXT: [[C:%.*]] = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR2:[0-9]+]]
|
|
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
|
|
; CHECK-NEXT: ret <4 x i32> [[C]]
|
|
;
|
|
entry:
|
|
%c = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) nounwind
|
|
ret <4 x i32> %c
|
|
}
|
|
|
|
|
|
|
|
define <1 x i64> @Test_ssse3_pmadd_ub_sw(<1 x i64> %a, <1 x i64> %b) sanitize_memory {
|
|
; CHECK-LABEL: define <1 x i64> @Test_ssse3_pmadd_ub_sw(
|
|
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
|
|
; CHECK-NEXT: call void @llvm.donothing()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i8> [[TMP4]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i8> [[TMP5]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP2]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i8> [[TMP3]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP15]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP17]], [[TMP15]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i1> [[TMP14]], [[TMP21]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i1> [[TMP16]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i1> [[TMP13]], [[TMP12]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i1> [[TMP22]] to <8 x i8>
|
|
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
|
|
; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <4 x i16> [[TMP18]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP24]] to <4 x i16>
|
|
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[TMP23]] to i64
|
|
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP19]] to <1 x i64>
|
|
; CHECK-NEXT: [[C:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[A]], <1 x i64> [[B]]) #[[ATTR2]]
|
|
; CHECK-NEXT: store <1 x i64> [[TMP20]], ptr @__msan_retval_tls, align 8
|
|
; CHECK-NEXT: ret <1 x i64> [[C]]
|
|
;
|
|
entry:
|
|
%c = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %a, <1 x i64> %b) nounwind
|
|
ret <1 x i64> %c
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_memory {
|
|
; CHECK-LABEL: define <2 x i64> @Test_x86_sse2_psad_bw(
|
|
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
|
|
; CHECK-NEXT: call void @llvm.donothing()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
|
|
; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48)
|
|
; CHECK-NEXT: [[C:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
|
|
; CHECK-NEXT: ret <2 x i64> [[C]]
|
|
;
|
|
%c = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a, <16 x i8> %b)
|
|
ret <2 x i64> %c
|
|
}
|
|
|
|
|
|
|
|
define <1 x i64> @Test_x86_mmx_psad_bw(<1 x i64> %a, <1 x i64> %b) sanitize_memory {
|
|
; CHECK-LABEL: define <1 x i64> @Test_x86_mmx_psad_bw(
|
|
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
|
|
; CHECK-NEXT: call void @llvm.donothing()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = or <1 x i64> [[TMP0]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP2]] to i64
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sext i1 [[TMP4]] to i64
|
|
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 48
|
|
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
|
|
; CHECK-NEXT: [[C:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> [[A]], <1 x i64> [[B]]) #[[ATTR2]]
|
|
; CHECK-NEXT: store <1 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
|
|
; CHECK-NEXT: ret <1 x i64> [[C]]
|
|
;
|
|
entry:
|
|
%c = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %a, <1 x i64> %b) nounwind
|
|
ret <1 x i64> %c
|
|
}
|
|
|