
https://github.com/llvm/llvm-project/pull/141152 causes an issue in v_s_xxx_f16 lowering in both true16/fake16 flow. V_S_XXX_F16 are special insts which has scalar input/output but in VALU VOP3 format. Need to keep the srcmod/clamp/omod when lower it to its corresponding VALU inst with vector input/output.
79 lines
3.8 KiB
YAML
79 lines
3.8 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
|
|
|
|
---
|
|
name: v_s_exp_f16
|
|
body: |
|
|
bb.0.entry:
|
|
; GCN-LABEL: name: v_s_exp_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_EXP_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_32
|
|
%3:sreg_32_xexec = V_S_EXP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: v_s_log_f16
|
|
body: |
|
|
bb.0.entry:
|
|
; GCN-LABEL: name: v_s_log_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_LOG_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_32
|
|
%3:sreg_32_xexec = V_S_LOG_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: v_s_rcp_f16
|
|
body: |
|
|
bb.0.entry:
|
|
; GCN-LABEL: name: v_s_rcp_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RCP_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_32
|
|
%3:sreg_32_xexec = V_S_RCP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: v_s_rsq_f16
|
|
body: |
|
|
bb.0.entry:
|
|
; GCN-LABEL: name: v_s_rsq_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RSQ_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_32
|
|
%3:sreg_32_xexec = V_S_RSQ_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: v_s_sqrt_f16
|
|
body: |
|
|
bb.0.entry:
|
|
; GCN-LABEL: name: v_s_sqrt_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_SQRT_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_32
|
|
%3:sreg_32_xexec = V_S_SQRT_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
|
|
...
|
|
|