llvm-project/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll
Alex MacLean a9de1ab44d
[NVPTX] Disable v2f32 registers when no operations supported, or via cl::opt (#154476)
The addition of v2f32 as a legal type, supported by the B64 register
class, has caused performance regressions, broken inline assembly, and
resulted in a couple (now fixed) mis-compilations. In order to mitigate
these issues, only mark this as a legal type when there exist operations
that support it, since for targets where this is not the case it serves
no purpose. To enable further debugging, add an option to disable v2f32.

In order to allow for a target-dependent set of legal types,
ComputePTXValueVTs has been fully re-written to take advantage of
TargetLowering call-lowering APIs.
2025-08-21 10:30:36 -07:00

50 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
; Check load from constant global variables. These loads should be
; ld.global.nc (aka ldg).
@gv_float = external constant float
@gv_float2 = external constant <2 x float>
@gv_float4 = external constant <4 x float>
define float @test_gv_float() {
; CHECK-LABEL: test_gv_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.global.nc.b32 %r1, [gv_float];
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%v = load float, ptr @gv_float
ret float %v
}
define <2 x float> @test_gv_float2() {
; CHECK-LABEL: test_gv_float2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.global.nc.v2.b32 {%r1, %r2}, [gv_float2];
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2};
; CHECK-NEXT: ret;
%v = load <2 x float>, ptr @gv_float2
ret <2 x float> %v
}
define <4 x float> @test_gv_float4() {
; CHECK-LABEL: test_gv_float4(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [gv_float4];
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: ret;
%v = load <4 x float>, ptr @gv_float4
ret <4 x float> %v
}