In review of bbde6b, I had originally proposed that we support the legacy text format. As review evolved, it bacame clear this had been a bad idea (too much complexity), but in order to let that patch finally move forward, I approved the change with the variant. This change undoes the variant, and updates all the tests to just use the array form.
154 lines
8.1 KiB
YAML
154 lines
8.1 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
|
|
# RUN: -mattr=+amx-transpose -run-pass=greedy,tileconfig -o - %s | FileCheck %s
|
|
|
|
--- |
|
|
@buf = dso_local global [2048 x i8] zeroinitializer, align 16
|
|
@buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
|
|
|
|
define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 {
|
|
entry:
|
|
%0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf, i64 0, i64 0), i64 32) #5
|
|
%1 = extractvalue { x86_amx, x86_amx } %0, 0
|
|
%2 = extractvalue { x86_amx, x86_amx } %0, 1
|
|
%3 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #5
|
|
%4 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %3, x86_amx %1, x86_amx %2) #5
|
|
tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf2, i64 0, i64 0), i64 32, x86_amx %4) #5
|
|
ret void
|
|
}
|
|
|
|
declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, i8*, i64) #1
|
|
|
|
declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2
|
|
|
|
declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3
|
|
|
|
declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3
|
|
|
|
declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2
|
|
|
|
declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) #4
|
|
|
|
attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" }
|
|
attributes #1 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
|
|
attributes #2 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
|
|
attributes #3 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
|
|
attributes #4 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
|
|
attributes #5 = { nounwind }
|
|
|
|
...
|
|
---
|
|
name: test_tile_2rpntlvwz0
|
|
alignment: 16
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
failedISel: false
|
|
tracksRegLiveness: true
|
|
hasWinCFI: false
|
|
callsEHReturn: false
|
|
callsUnwindInit: false
|
|
hasEHContTarget: false
|
|
hasEHScopes: false
|
|
hasEHFunclets: false
|
|
failsVerification: false
|
|
tracksDebugUserValues: false
|
|
registers:
|
|
- { id: 0, class: gr32, preferred-register: '' }
|
|
- { id: 1, class: gr32, preferred-register: '' }
|
|
- { id: 2, class: gr32, preferred-register: '' }
|
|
- { id: 3, class: gr16, preferred-register: '' }
|
|
- { id: 4, class: gr16, preferred-register: '' }
|
|
- { id: 5, class: gr16, preferred-register: '' }
|
|
- { id: 6, class: gr64, preferred-register: '' }
|
|
- { id: 7, class: gr64_nosp, preferred-register: '' }
|
|
- { id: 8, class: tilepair, preferred-register: '' }
|
|
- { id: 9, class: tile, preferred-register: '' }
|
|
- { id: 10, class: tile, preferred-register: '' }
|
|
- { id: 11, class: tile, preferred-register: '' }
|
|
- { id: 12, class: tile, preferred-register: '' }
|
|
- { id: 13, class: gr64, preferred-register: '' }
|
|
- { id: 14, class: vr512, preferred-register: '' }
|
|
liveins:
|
|
- { reg: '$edi', virtual-reg: '%0' }
|
|
- { reg: '$esi', virtual-reg: '%1' }
|
|
- { reg: '$edx', virtual-reg: '%2' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 4
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
stackProtector: ''
|
|
functionContext: ''
|
|
maxCallFrameSize: 4294967295
|
|
cvBytesOfCalleeSavedRegisters: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
hasTailCall: false
|
|
localFrameSize: 0
|
|
savePoint: []
|
|
restorePoint: []
|
|
fixedStack: []
|
|
stack:
|
|
- { id: 0, name: '', type: default, offset: 0, size: 64, alignment: 4,
|
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
|
callSites: []
|
|
debugValueSubstitutions: []
|
|
constants: []
|
|
machineFunctionInfo:
|
|
amxProgModel: ManagedRA
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $edi, $esi, $edx
|
|
|
|
|
|
; CHECK-LABEL: name: test_tile_2rpntlvwz0
|
|
; CHECK: liveins: $edi, $esi, $edx
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi
|
|
; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0
|
|
; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4)
|
|
; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4)
|
|
; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 26, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 26, align 2, basealign 4)
|
|
; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 53, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 53, align 1, basealign 4)
|
|
; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 24, $noreg, [[COPY1]].sub_16bit :: (store (s512) into %stack.0 + 24, align 4)
|
|
; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 52, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 52, align 4)
|
|
; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 16, align 4)
|
|
; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 48, align 4)
|
|
; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4)
|
|
; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf
|
|
; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
|
|
; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[COPY]].sub_16bit, [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg
|
|
; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit
|
|
; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTDPBSSDV [[COPY2]].sub_16bit, [[COPY]].sub_16bit, [[COPY1]].sub_16bit, [[PTILEZEROV]], [[PT2RPNTLVWZ0V]].sub_t0, [[PT2RPNTLVWZ0V]].sub_t1
|
|
; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2
|
|
; CHECK-NEXT: PTILESTOREDV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[MOV32ri64_2]], 1, [[MOV32ri64_1]], 0, $noreg, [[PTILEZEROV]]
|
|
; CHECK-NEXT: RET 0
|
|
%2:gr32 = COPY $edx
|
|
%1:gr32 = COPY $esi
|
|
%0:gr32 = COPY $edi
|
|
%14:vr512 = AVX512_512_SET0
|
|
VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, %14 :: (store (s512) into %stack.0, align 4)
|
|
MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4)
|
|
PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4)
|
|
%6:gr64 = MOV32ri64 @buf
|
|
%7:gr64_nosp = MOV32ri64 32
|
|
%8:tilepair = PT2RPNTLVWZ0V %0.sub_16bit, %1.sub_16bit, %2.sub_16bit, %6, 1, %7, 0, $noreg
|
|
%12:tile = PTILEZEROV %0.sub_16bit, %1.sub_16bit
|
|
%12:tile = PTDPBSSDV %0.sub_16bit, %2.sub_16bit, %1.sub_16bit, %12, %8.sub_t0, %8.sub_t1
|
|
%13:gr64 = MOV32ri64 @buf2
|
|
PTILESTOREDV %0.sub_16bit, %1.sub_16bit, %13, 1, %7, 0, $noreg, %12
|
|
RET 0
|
|
|
|
...
|