From d1e3ab9c4b80763c6bc260bbf62598f92e63f8fe Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 11 Jul 2025 22:16:50 -0700 Subject: [PATCH] [AMDGPU] Use v_mov_b64 in codegen on gfx1250 (#148272) --- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir | 103 ++++++++++++++++++ llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll | 97 ++++++++--------- .../CodeGen/AMDGPU/v_mov_b64_expansion.mir | 27 +++-- 5 files changed, 168 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index e6dd98a10420..0ad4778875cd 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1156,7 +1156,7 @@ public: bool hasMadF16() const; - bool hasMovB64() const { return GFX940Insts; } + bool hasMovB64() const { return GFX940Insts || GFX1250Insts; } bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ca3af3b48a60..4c5f93883124 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2214,7 +2214,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if (ST.hasMovB64()) { MI.setDesc(get(AMDGPU::V_MOV_B64_e32)); if (SrcOp.isReg() || isInlineConstant(MI, 1) || - isUInt<32>(SrcOp.getImm())) + isUInt<32>(SrcOp.getImm()) || ST.has64BitLiterals()) break; } if (SrcOp.isImm()) { diff --git a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir index d5dfb5dd0848..cc976fe13c47 100644 --- a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir +++ b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir @@ -4,6 +4,7 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX942 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass postrapseudos %s -o - | FileCheck -check-prefix=GFX1250 %s --- name: copy_v64_to_v64 @@ -32,6 +33,10 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2_vgpr3 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec + ; GFX1250-LABEL: name: copy_v64_to_v64 + ; GFX1250: liveins: $vgpr2_vgpr3 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec $vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3, implicit $exec ... @@ -62,6 +67,10 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $exec + ; GFX1250-LABEL: name: copy_s64_to_v64 + ; GFX1250: liveins: $sgpr2_sgpr3 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $sgpr2_sgpr3, implicit $exec, implicit $exec $vgpr0_vgpr1 = COPY killed $sgpr2_sgpr3, implicit $exec ... @@ -94,6 +103,11 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3 ; GFX10-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec + ; GFX1250-LABEL: name: copy_a64_to_v64 + ; GFX1250: liveins: $agpr2_agpr3 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3 + ; GFX1250-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec $vgpr0_vgpr1 = COPY killed $agpr2_agpr3, implicit $exec ... @@ -130,6 +144,11 @@ body: | ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec + ; GFX1250-LABEL: name: copy_v128_to_v128_fwd + ; GFX1250: liveins: $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX1250-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr4_vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec ... @@ -166,6 +185,11 @@ body: | ; GFX10-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; GFX1250-LABEL: name: copy_v128_to_v128_back + ; GFX1250: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX1250-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec $vgpr2_vgpr3_vgpr4_vgpr5 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ... @@ -202,6 +226,12 @@ body: | ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec + ; GFX1250-LABEL: name: copy_v96_to_v96 + ; GFX1250: liveins: $vgpr4_vgpr5_vgpr6 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6 + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec $vgpr0_vgpr1_vgpr2 = COPY killed $vgpr4_vgpr5_vgpr6, implicit $exec ... @@ -232,6 +262,10 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2_vgpr3 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec + ; GFX1250-LABEL: name: copy_v64_to_v64_undef_sub0 + ; GFX1250: liveins: $vgpr3 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec $vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3, implicit $exec ... @@ -262,6 +296,10 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2_vgpr3 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec + ; GFX1250-LABEL: name: copy_v64_to_v64_undef_sub1 + ; GFX1250: liveins: $vgpr2 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec $vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3, implicit $exec ... @@ -298,6 +336,11 @@ body: | ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX1250-LABEL: name: copy_s128_to_v128_killed + ; GFX1250: liveins: $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr4_sgpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX1250-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $sgpr6_sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $sgpr4_sgpr5_sgpr6_sgpr7 ... @@ -330,6 +373,11 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec + ; GFX1250-LABEL: name: copy_v64_to_v64_unaligned + ; GFX1250: liveins: $vgpr2_vgpr3 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec $vgpr1_vgpr2 = COPY killed $vgpr2_vgpr3, implicit $exec ... @@ -362,6 +410,11 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec + ; GFX1250-LABEL: name: copy_v64_unaligned_to_v64 + ; GFX1250: liveins: $vgpr3_vgpr4 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4 + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec $vgpr0_vgpr1 = COPY killed $vgpr3_vgpr4, implicit $exec ... @@ -402,6 +455,13 @@ body: | ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX10-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec + ; GFX1250-LABEL: name: copy_v128_to_v128_unaligned + ; GFX1250: liveins: $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX1250-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec ... @@ -442,6 +502,13 @@ body: | ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec + ; GFX1250-LABEL: name: copy_v128_unaligned_to_v128 + ; GFX1250: liveins: $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec ... @@ -474,6 +541,11 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec + ; GFX1250-LABEL: name: copy_s64_to_v64_unaligned + ; GFX1250: liveins: $sgpr8_sgpr9 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec $vgpr1_vgpr2 = COPY killed $sgpr8_sgpr9, implicit $exec ... @@ -514,6 +586,13 @@ body: | ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX10-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec + ; GFX1250-LABEL: name: copy_s128_to_v128_unaligned + ; GFX1250: liveins: $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX1250-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec ... @@ -550,6 +629,12 @@ body: | ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10 ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec + ; GFX1250-LABEL: name: copy_v96_to_v96_unaligned + ; GFX1250: liveins: $vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10 + ; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec $vgpr1_vgpr2_vgpr3 = COPY killed $vgpr8_vgpr9_vgpr10, implicit $exec ... @@ -586,6 +671,12 @@ body: | ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec + ; GFX1250-LABEL: name: copy_v96_unaligned_to_v96 + ; GFX1250: liveins: $vgpr7_vgpr8_vgpr9 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9 + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec $vgpr0_vgpr1_vgpr2 = COPY killed $vgpr7_vgpr8_vgpr9, implicit $exec ... @@ -622,6 +713,12 @@ body: | ; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec + ; GFX1250-LABEL: name: copy_s96_to_v96 + ; GFX1250: liveins: $sgpr0_sgpr1_sgpr2 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2 + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec $vgpr0_vgpr1_vgpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec ... @@ -658,5 +755,11 @@ body: | ; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2 ; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 ; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec + ; GFX1250-LABEL: name: copy_s96_to_v96_unaligned + ; GFX1250: liveins: $sgpr0_sgpr1_sgpr2 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2 + ; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec $vgpr1_vgpr2_vgpr3 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll index d8079651787a..5d35adc8cbe0 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll @@ -65,15 +65,15 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 ; GCN-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off +; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 0 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: v_pk_add_u16 v10, v6, v2 ; GCN-SDAG-NEXT: v_pk_add_u16 v11, v7, v3 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v2, 12 :: v_dual_mov_b32 v6, 8 -; GCN-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v7, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v8, 0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 12 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 8 ; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_lshrrev_b32 v0, 16, v10 +; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v10 ; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v1 ; GCN-SDAG-NEXT: s_clause 0x2 ; GCN-SDAG-NEXT: global_store_b16 v[2:3], v11, off @@ -87,19 +87,17 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[2:3], off -; GCN-GISEL-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v10, 2 -; GCN-GISEL-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v11, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v14, 6 -; GCN-GISEL-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v15, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v16, 8 :: v_dual_mov_b32 v18, 10 -; GCN-GISEL-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v19, 0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 8 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 10 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 ; GCN-GISEL-NEXT: v_pk_add_u16 v2, v6, v2 ; GCN-GISEL-NEXT: v_pk_add_u16 v4, v4, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v20, 12 ; GCN-GISEL-NEXT: v_pk_add_u16 v1, v5, v1 -; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GCN-GISEL-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_lshrrev_b32 v0, 16, v2 ; GCN-GISEL-NEXT: v_pk_add_u16 v3, v7, v3 ; GCN-GISEL-NEXT: s_clause 0x6 ; GCN-GISEL-NEXT: global_store_b16 v[8:9], v4, off @@ -109,6 +107,7 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-GISEL-NEXT: global_store_b16 v[16:17], v2, off ; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[18:19], v2, off ; GCN-GISEL-NEXT: global_store_b16 v[20:21], v3, off +; GCN-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v2 ; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31] %vec1 = load <7 x i16>, ptr addrspace(1) %ptr1 %insert = insertelement <7 x i16> %vec1, i16 20, i32 4 @@ -313,16 +312,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:16 ; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64 -; GCN-SDAG-NEXT: v_mov_b32_e32 v16, 0x70 -; GCN-SDAG-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v50, 0x60 -; GCN-SDAG-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48 -; GCN-SDAG-NEXT: v_mov_b32_e32 v54, 32 -; GCN-SDAG-NEXT: v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v14, 0xc8 -; GCN-SDAG-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v39, 0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v48, 64 :: v_dual_mov_b32 v40, 16 -; GCN-SDAG-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v49, 0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v43, 0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[16:17], 0x70 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 0x60 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[52:53], 48 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x50 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 32 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 64 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[40:41], 16 +; GCN-SDAG-NEXT: v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[42:43], 0 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x7 ; GCN-SDAG-NEXT: global_store_b128 v[16:17], v[6:9], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x6 @@ -401,21 +399,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:96 ; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:112 ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64 -; GCN-GISEL-NEXT: v_mov_b32_e32 v34, 0xc8 -; GCN-GISEL-NEXT: v_dual_mov_b32 v35, 0 :: v_dual_mov_b32 v38, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 16 -; GCN-GISEL-NEXT: v_dual_mov_b32 v50, 32 :: v_dual_mov_b32 v49, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48 -; GCN-GISEL-NEXT: v_mov_b32_e32 v54, 64 -; GCN-GISEL-NEXT: v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v53, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v41, 0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v42, 0x60 -; GCN-GISEL-NEXT: v_dual_mov_b32 v44, 0x70 :: v_dual_mov_b32 v43, 0 -; GCN-GISEL-NEXT: s_wait_loadcnt 0x7 -; GCN-GISEL-NEXT: v_dual_mov_b32 v45, 0 :: v_dual_mov_b32 v37, v9 -; GCN-GISEL-NEXT: v_mov_b32_e32 v36, v8 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9] +; GCN-GISEL-NEXT: v_mov_b64_e32 v[38:39], 0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[48:49], 16 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[50:51], 32 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[52:53], 48 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[54:55], 64 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[34:35], 0xc8 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[40:41], 0x50 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[42:43], 0x60 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[44:45], 0x70 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x6 ; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off ; GCN-GISEL-NEXT: s_wait_loadcnt 0x5 @@ -428,6 +420,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: global_store_b128 v[42:43], v[26:29], off ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1 ; GCN-GISEL-NEXT: global_store_b128 v[44:45], v[30:33], off +; GCN-GISEL-NEXT: v_mov_b64_e32 v[36:37], v[8:9] ; GCN-GISEL-NEXT: s_wait_xcnt 0x5 ; GCN-GISEL-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11] ; GCN-GISEL-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13] @@ -443,6 +436,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 ; GCN-GISEL-NEXT: v_lshl_add_u64 v[48:49], v[0:1], 0, v[0:1] ; GCN-GISEL-NEXT: v_lshl_add_u64 v[50:51], v[2:3], 0, v[2:3] +; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8 +; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9] ; GCN-GISEL-NEXT: s_wait_xcnt 0x1 ; GCN-GISEL-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27] ; GCN-GISEL-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29] @@ -490,11 +485,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 ; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 -; GCN-SDAG-NEXT: v_mov_b32_e32 v8, 12 -; GCN-SDAG-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 8 -; GCN-SDAG-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_lshlrev_b32 v4, 4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v12, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v13, 0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 12 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0 +; GCN-SDAG-NEXT: v_lshlrev_b32_e32 v4, 4, v0 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 ; GCN-SDAG-NEXT: s_clause 0x1 ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] @@ -518,15 +512,14 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GCN-GISEL-NEXT: s_wait_xcnt 0x0 ; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 -; GCN-GISEL-NEXT: v_mov_b32_e32 v8, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 2 -; GCN-GISEL-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_lshlrev_b32 v4, 4, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v12, 4 -; GCN-GISEL-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 6 -; GCN-GISEL-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 8 -; GCN-GISEL-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 10 -; GCN-GISEL-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 12 -; GCN-GISEL-NEXT: v_mov_b32_e32 v21, 0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4 +; GCN-GISEL-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 8 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 10 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: s_clause 0x1 ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v4, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir index 7a8feff59c1f..70e298745419 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir +++ b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir @@ -1,12 +1,14 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX900 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX90A %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX942 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX900,NOT-GFX1250 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX90A,NOT-GFX1250 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX942,NOT-GFX1250 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass postrapseudos %s -o - | FileCheck -check-prefixes=GCN,GFX1250 %s # GCN-LABEL: name: v_mov_b64_from_vgpr # GFX900: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX900: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec # GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec name: v_mov_b64_from_vgpr body: | bb.0: @@ -18,6 +20,7 @@ body: | # GFX900: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr2_sgpr3, 12, $sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec # GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr2_sgpr3, implicit $exec +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr2_sgpr3, implicit $exec name: v_mov_b64_from_sgpr body: | bb.0: @@ -30,6 +33,7 @@ body: | # GFX90A: $vgpr0 = V_MOV_B32_e32 -2, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 -2, implicit $exec +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 -2, implicit $exec name: v_mov_b64_from_sext_inline_imm body: | bb.0: @@ -37,8 +41,9 @@ body: | ... # GCN-LABEL: name: v_mov_b64_from_lit -# GCN: $vgpr0 = V_MOV_B32_e32 1430494974, implicit $exec, implicit-def $vgpr0_vgpr1 -# GCN: $vgpr1 = V_MOV_B32_e32 -232831, implicit $exec, implicit-def $vgpr0_vgpr1 +# NOT-GFX1250: $vgpr0 = V_MOV_B32_e32 1430494974, implicit $exec, implicit-def $vgpr0_vgpr1 +# NOT-GFX1250: $vgpr1 = V_MOV_B32_e32 -232831, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 -1000000100000002, implicit $exec name: v_mov_b64_from_lit body: | bb.0: @@ -46,8 +51,9 @@ body: | ... # GCN-LABEL: name: v_mov_b64_from_first_inline_imm -# GCN: $vgpr0 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 -# GCN: $vgpr1 = V_MOV_B32_e32 268435455, implicit $exec, implicit-def $vgpr0_vgpr1 +# NOT-GFX1250: $vgpr0 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 +# NOT-GFX1250: $vgpr1 = V_MOV_B32_e32 268435455, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 1152921504606846975, implicit $exec name: v_mov_b64_from_first_inline_imm body: | bb.0: @@ -55,8 +61,9 @@ body: | ... # GCN-LABEL: name: v_mov_b64_from_second_inline_imm -# GCN: $vgpr0 = V_MOV_B32_e32 268435455, implicit $exec, implicit-def $vgpr0_vgpr1 -# GCN: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 +# NOT-GFX1250: $vgpr0 = V_MOV_B32_e32 268435455, implicit $exec, implicit-def $vgpr0_vgpr1 +# NOT-GFX1250: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 -4026531841, implicit $exec name: v_mov_b64_from_second_inline_imm body: | bb.0: @@ -68,6 +75,7 @@ body: | # GFX900: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, -1, 8, -1, 0, 0, 0, 0, 0, implicit $exec # GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 -1, implicit $exec +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 -1, implicit $exec name: v_mov_b64_from_same_sext_inline_imm body: | bb.0: @@ -79,6 +87,7 @@ body: | # GFX900: $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, 1065353216, 8, 1065353216, 0, 0, 0, 0, 0, implicit $exec # GFX942: $vgpr0_vgpr1 = V_PK_MOV_B32 8, 1065353216, 8, 1065353216, 0, 0, 0, 0, 0, implicit $exec +# GFX1250: $vgpr0_vgpr1 = V_MOV_B64_e32 4575657222473777152, implicit $exec name: v_mov_b64_from_same_fp_inline_imm body: | bb.0: