; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx | FileCheck %s ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %} target triple = "nvptx64-nvidia-cuda" %struct.float2 = type { float, float } ; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md ; CHECK-NEXT: ( ; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8] ; CHECK-NEXT: ) ; CHECK-NEXT: ; ; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee ; CHECK-NEXT: ( ; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8] ; CHECK-NEXT: ) ; CHECK-NEXT: ; define float @caller_md(float %a, float %b) { ; CHECK-LABEL: caller_md( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [caller_md_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: call.uni (retval0), callee_md, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %s1 = insertvalue %struct.float2 poison, float %a, 0 %s2 = insertvalue %struct.float2 %s1, float %b, 1 %r = call float @callee_md(%struct.float2 %s2) ret float %r } define float @callee_md(%struct.float2 alignstack(8) %a) { ; CHECK-LABEL: callee_md( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0]; ; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %v0 = extractvalue %struct.float2 %a, 0 %v1 = extractvalue %struct.float2 %a, 1 %2 = fadd float %v0, %v1 ret float %2 } define float @caller(float %a, float %b) { ; CHECK-LABEL: caller( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [caller_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %s1 = insertvalue %struct.float2 poison, float %a, 0 %s2 = insertvalue %struct.float2 %s1, float %b, 1 %r = call float @callee(%struct.float2 %s2) ret float %r } define float @callee(%struct.float2 alignstack(8) %a ) { ; CHECK-LABEL: callee( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_param_0]; ; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %v0 = extractvalue %struct.float2 %a, 0 %v1 = extractvalue %struct.float2 %a, 1 %2 = fadd float %v0, %v1 ret float %2 } define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) { ; CHECK-LABEL: aligned_return( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0+4]; ; CHECK-NEXT: st.param.b32 [func_retval0+4], %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret %struct.float2 %a }