llvm-project/llvm/test/CodeGen/SystemZ/fp-half-vector-mem.ll
Jonas Paulsson c999e9a4fe
[SystemZ] Support fp16 vector ABI and basic codegen. (#171066)
- Make v8f16 a legal type so that arguments can be passed in vector
registers. Handle fp16 vectors so that they have the same ABI as other
fp vectors.

- Set the preferred vector action for fp16 vectors to "split". This will
scalarize all operations, which is not always necessary (like with
memory operations), but it avoids the superfluous operations that result
after first widening and then scalarizing a narrow vector (like v4f16).

Fixes #168992
2026-01-26 13:42:25 -06:00

127 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
;
; Test loading-and-store fp16 vectors.
define void @fun0(ptr %Src, ptr %Dst) {
; CHECK-LABEL: fun0:
; CHECK: # %bb.0:
; CHECK-NEXT: lgh %r0, 0(%r2)
; CHECK-NEXT: lgh %r1, 2(%r2)
; CHECK-NEXT: sllg %r0, %r0, 48
; CHECK-NEXT: ldgr %f0, %r0
; CHECK-NEXT: lgh %r0, 4(%r2)
; CHECK-NEXT: sllg %r1, %r1, 48
; CHECK-NEXT: ldgr %f1, %r1
; CHECK-NEXT: lgh %r1, 6(%r2)
; CHECK-NEXT: sllg %r0, %r0, 48
; CHECK-NEXT: ldgr %f2, %r0
; CHECK-NEXT: lgh %r0, 8(%r2)
; CHECK-NEXT: sllg %r1, %r1, 48
; CHECK-NEXT: ldgr %f3, %r1
; CHECK-NEXT: lgh %r1, 10(%r2)
; CHECK-NEXT: sllg %r0, %r0, 48
; CHECK-NEXT: ldgr %f4, %r0
; CHECK-NEXT: lgh %r0, 12(%r2)
; CHECK-NEXT: sllg %r1, %r1, 48
; CHECK-NEXT: lgh %r2, 14(%r2)
; CHECK-NEXT: ldgr %f5, %r1
; CHECK-NEXT: sllg %r0, %r0, 48
; CHECK-NEXT: ldgr %f6, %r0
; CHECK-NEXT: sllg %r0, %r2, 48
; CHECK-NEXT: ldgr %f7, %r0
; CHECK-NEXT: lgdr %r0, %f7
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 14(%r3)
; CHECK-NEXT: lgdr %r0, %f6
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 12(%r3)
; CHECK-NEXT: lgdr %r0, %f5
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 10(%r3)
; CHECK-NEXT: lgdr %r0, %f4
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 8(%r3)
; CHECK-NEXT: lgdr %r0, %f3
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 6(%r3)
; CHECK-NEXT: lgdr %r0, %f2
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 4(%r3)
; CHECK-NEXT: lgdr %r0, %f1
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 2(%r3)
; CHECK-NEXT: lgdr %r0, %f0
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 0(%r3)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: fun0:
; VECTOR: # %bb.0:
; VECTOR-NEXT: vl %v0, 0(%r2), 3
; VECTOR-NEXT: vst %v0, 0(%r3), 3
; VECTOR-NEXT: br %r14
%L = load <8 x half>, ptr %Src
store <8 x half> %L, ptr %Dst
ret void
}
define void @fun1(ptr %Src, ptr %Dst) {
; CHECK-LABEL: fun1:
; CHECK: # %bb.0:
; CHECK-NEXT: lgh %r0, 4(%r2)
; CHECK-NEXT: lgh %r1, 6(%r2)
; CHECK-NEXT: l %r2, 0(%r2)
; CHECK-NEXT: sllg %r0, %r0, 48
; CHECK-NEXT: ldgr %f0, %r0
; CHECK-NEXT: sllg %r0, %r1, 48
; CHECK-NEXT: ldgr %f1, %r0
; CHECK-NEXT: st %r2, 0(%r3)
; CHECK-NEXT: lgdr %r0, %f1
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 6(%r3)
; CHECK-NEXT: lgdr %r0, %f0
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 4(%r3)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: fun1:
; VECTOR: # %bb.0:
; VECTOR-NEXT: l %r0, 0(%r2)
; VECTOR-NEXT: vlreph %v0, 4(%r2)
; VECTOR-NEXT: vlreph %v1, 6(%r2)
; VECTOR-NEXT: vsteh %v1, 6(%r3), 0
; VECTOR-NEXT: vsteh %v0, 4(%r3), 0
; VECTOR-NEXT: st %r0, 0(%r3)
; VECTOR-NEXT: br %r14
%L = load <4 x half>, ptr %Src
store <4 x half> %L, ptr %Dst
ret void
}
define void @fun2(ptr %Src, ptr %Dst) {
; CHECK-LABEL: fun2:
; CHECK: # %bb.0:
; CHECK-NEXT: lg %r0, 24(%r2)
; CHECK-NEXT: lg %r1, 16(%r2)
; CHECK-NEXT: lg %r4, 8(%r2)
; CHECK-NEXT: lg %r2, 0(%r2)
; CHECK-NEXT: stg %r0, 24(%r3)
; CHECK-NEXT: stg %r1, 16(%r3)
; CHECK-NEXT: stg %r4, 8(%r3)
; CHECK-NEXT: stg %r2, 0(%r3)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: fun2:
; VECTOR: # %bb.0:
; VECTOR-NEXT: vl %v0, 16(%r2), 4
; VECTOR-NEXT: vl %v1, 0(%r2), 4
; VECTOR-NEXT: vst %v1, 0(%r3), 4
; VECTOR-NEXT: vst %v0, 16(%r3), 4
; VECTOR-NEXT: br %r14
%L = load <16 x half>, ptr %Src
store <16 x half> %L, ptr %Dst
ret void
}