llvm-project/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll

399 lines
17 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Verifies correctness of load/store of parameters and return values.
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | %ptxas-verify %}
%s_i8i16p = type { <{ i16, i8, i16 }>, i64 }
%s_i8i32p = type { <{ i32, i8, i32 }>, i64 }
%s_i8i64p = type { <{ i64, i8, i64 }>, i64 }
%s_i8f16p = type { <{ half, i8, half }>, i64 }
%s_i8f16x2p = type { <{ <2 x half>, i8, <2 x half> }>, i64 }
%s_i8f32p = type { <{ float, i8, float }>, i64 }
%s_i8f64p = type { <{ double, i8, double }>, i64 }
; -- All loads/stores from parameters aligned by one must be done one
; byte at a time.
; -- Notes:
; -- There are two fields of interest in the packed part of the struct, one
; with a proper offset and one without. The former should be loaded or
; stored as a whole, and the latter by bytes.
; -- Only loading and storing the said fields are checked in the following
; series of tests so that they are more concise.
define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
; CHECK-LABEL: test_s_i8i16p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<13>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i16p_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8];
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i16p_param_0+4];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+2];
; CHECK-NEXT: ld.param.b16 %rs3, [retval0];
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+4];
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+3];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: shl.b16 %rs8, %rs4, 8;
; CHECK-NEXT: or.b16 %rs9, %rs8, %rs5;
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs5;
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs2;
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
; CHECK-NEXT: shr.u16 %rs12, %rs9, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs12;
; CHECK-NEXT: ret;
%r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
ret %s_i8i16p %r
}
define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
; CHECK-LABEL: test_s_i8i32p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0];
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i32p_param_0+4];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8i32p_param_0+6];
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+7];
; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8i32p_param_0+8];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
; CHECK-NEXT: st.param.b8 [param0+8], %r4;
; CHECK-NEXT: st.param.b8 [param0+7], %r3;
; CHECK-NEXT: st.param.b8 [param0+6], %r2;
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: shl.b32 %r12, %r8, 8;
; CHECK-NEXT: or.b32 %r13, %r12, %r9;
; CHECK-NEXT: shl.b32 %r15, %r7, 16;
; CHECK-NEXT: shl.b32 %r17, %r6, 24;
; CHECK-NEXT: or.b32 %r18, %r17, %r15;
; CHECK-NEXT: or.b32 %r19, %r18, %r13;
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: shr.u32 %r21, %r19, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
; CHECK-NEXT: shr.u32 %r22, %r19, 16;
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
; CHECK-NEXT: shr.u32 %r23, %r19, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
ret %s_i8i32p %r
}
define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
; CHECK-LABEL: test_s_i8i64p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<46>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8i64p_param_0+8];
; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+16];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 8 .b8 param0[32];
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
; CHECK-NEXT: ret;
%r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
ret %s_i8i64p %r
}
define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
; CHECK-LABEL: test_s_i8f16p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<15>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0];
; CHECK-NEXT: ld.param.b16 %rs2, [test_s_i8f16p_param_0+2];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
; CHECK-NEXT: ld.param.b8 %rs3, [test_s_i8f16p_param_0+4];
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
; CHECK-NEXT: st.param.b8 [param0+4], %rs3;
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-NEXT: st.param.b16 [param0+2], %rs2;
; CHECK-NEXT: st.param.b16 [param0], %rs1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+2];
; CHECK-NEXT: ld.param.b16 %rs5, [retval0];
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+4];
; CHECK-NEXT: ld.param.b8 %rs7, [retval0+3];
; CHECK-NEXT: } // callseq 3
; CHECK-NEXT: shl.b16 %rs10, %rs6, 8;
; CHECK-NEXT: or.b16 %rs11, %rs10, %rs7;
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs7;
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs4;
; CHECK-NEXT: st.param.b16 [func_retval0], %rs5;
; CHECK-NEXT: shr.u16 %rs14, %rs11, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs14;
; CHECK-NEXT: ret;
%r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
ret %s_i8f16p %r
}
define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
; CHECK-LABEL: test_s_i8f16x2p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0];
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16x2p_param_0+4];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f16x2p_param_0+6];
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+7];
; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f16x2p_param_0+8];
; CHECK-NEXT: { // callseq 4, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
; CHECK-NEXT: st.param.b8 [param0+8], %r4;
; CHECK-NEXT: st.param.b8 [param0+7], %r3;
; CHECK-NEXT: st.param.b8 [param0+6], %r2;
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 4
; CHECK-NEXT: shl.b32 %r12, %r8, 8;
; CHECK-NEXT: or.b32 %r13, %r12, %r9;
; CHECK-NEXT: shl.b32 %r15, %r7, 16;
; CHECK-NEXT: shl.b32 %r17, %r6, 24;
; CHECK-NEXT: or.b32 %r18, %r17, %r15;
; CHECK-NEXT: or.b32 %r19, %r18, %r13;
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: shr.u32 %r21, %r19, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
; CHECK-NEXT: shr.u32 %r22, %r19, 16;
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
; CHECK-NEXT: shr.u32 %r23, %r19, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
ret %s_i8f16x2p %r
}
define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
; CHECK-LABEL: test_s_i8f32p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0];
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f32p_param_0+4];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f32p_param_0+6];
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+7];
; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f32p_param_0+8];
; CHECK-NEXT: { // callseq 5, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
; CHECK-NEXT: st.param.b8 [param0+8], %r4;
; CHECK-NEXT: st.param.b8 [param0+7], %r3;
; CHECK-NEXT: st.param.b8 [param0+6], %r2;
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 5
; CHECK-NEXT: shl.b32 %r12, %r8, 8;
; CHECK-NEXT: or.b32 %r13, %r12, %r9;
; CHECK-NEXT: shl.b32 %r15, %r7, 16;
; CHECK-NEXT: shl.b32 %r17, %r6, 24;
; CHECK-NEXT: or.b32 %r18, %r17, %r15;
; CHECK-NEXT: or.b32 %r19, %r18, %r13;
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: shr.u32 %r21, %r19, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
; CHECK-NEXT: shr.u32 %r22, %r19, 16;
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
; CHECK-NEXT: shr.u32 %r23, %r19, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
ret %s_i8f32p %r
}
define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
; CHECK-LABEL: test_s_i8f64p(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<46>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8f64p_param_0+8];
; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+16];
; CHECK-NEXT: { // callseq 6, 0
; CHECK-NEXT: .param .align 8 .b8 param0[32];
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
; CHECK-NEXT: } // callseq 6
; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
; CHECK-NEXT: ret;
%r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
ret %s_i8f64p %r
}