
The addition of v2f32 as a legal type, supported by the B64 register class, has caused performance regressions, broken inline assembly, and resulted in a couple (now fixed) mis-compilations. In order to mitigate these issues, only mark this as a legal type when there exist operations that support it, since for targets where this is not the case it serves no purpose. To enable further debugging, add an option to disable v2f32. In order to allow for a target-dependent set of legal types, ComputePTXValueVTs has been fully re-written to take advantage of TargetLowering call-lowering APIs.
50 lines
1.5 KiB
LLVM
50 lines
1.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
|
|
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
|
|
|
|
; Check load from constant global variables. These loads should be
|
|
; ld.global.nc (aka ldg).
|
|
|
|
@gv_float = external constant float
|
|
@gv_float2 = external constant <2 x float>
|
|
@gv_float4 = external constant <4 x float>
|
|
|
|
define float @test_gv_float() {
|
|
; CHECK-LABEL: test_gv_float(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.global.nc.b32 %r1, [gv_float];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%v = load float, ptr @gv_float
|
|
ret float %v
|
|
}
|
|
|
|
define <2 x float> @test_gv_float2() {
|
|
; CHECK-LABEL: test_gv_float2(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.global.nc.v2.b32 {%r1, %r2}, [gv_float2];
|
|
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2};
|
|
; CHECK-NEXT: ret;
|
|
%v = load <2 x float>, ptr @gv_float2
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
define <4 x float> @test_gv_float4() {
|
|
; CHECK-LABEL: test_gv_float4(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<5>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [gv_float4];
|
|
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r2, %r3, %r4};
|
|
; CHECK-NEXT: ret;
|
|
%v = load <4 x float>, ptr @gv_float4
|
|
ret <4 x float> %v
|
|
}
|