Luo, Yuanke 979d876bb4 [X86][AMX] enable amx cast intrinsics in FE.
We have some discission in D99152 and llvm-dev and finially come up with
a solution to add amx specific cast intrinsics. We've support the
intrinsics in llvm IR. This patch is to replace bitcast with amx cast
intrinsics in code emitting in FE.

Differential Revision: https://reviews.llvm.org/D122567
2022-04-02 14:02:35 +08:00

105 lines
4.0 KiB
C

// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f -target-feature +amx-int8 \
// RUN: -target-feature +amx-bf16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
#include <immintrin.h>
char buf[1024];
#define STRIDE 32
char buf2[1024];
// This is an example code and integration test.
void test_api(int cond, short row, short col) {
//CHECK-LABEL: @test_api
//CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
//CHECK-DAG: call void @llvm.x86.tilestored64.internal
__tile1024i a = {row, 8};
__tile1024i b = {8, col};
__tile1024i c = {row, col};
if (cond) {
__tile_loadd(&a, buf, STRIDE);
__tile_loadd(&b, buf, STRIDE);
__tile_loadd(&c, buf, STRIDE);
} else {
__tile_loadd(&a, buf2, STRIDE);
__tile_loadd(&b, buf2, STRIDE);
__tile_loadd(&c, buf2, STRIDE);
}
__tile_dpbssd(&c, a, b);
__tile_stored(buf, STRIDE, c);
}
void test_tile_loadd(short row, short col) {
//CHECK-LABEL: @test_tile_loadd
//CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile1024i a = {row, col};
__tile_loadd(&a, buf, STRIDE);
}
void test_tile_stream_loadd(short row, short col) {
//CHECK-LABEL: @test_tile_stream_loadd
//CHECK-DAG: call x86_amx @llvm.x86.tileloaddt164.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile1024i a = {row, col};
__tile_stream_loadd(&a, buf, STRIDE);
}
void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
//CHECK-LABEL: @test_tile_dpbssd
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile_dpbssd(&c, a, b);
}
void test_tile_dpbsud(__tile1024i a, __tile1024i b, __tile1024i c) {
//CHECK-LABEL: @test_tile_dpbsud
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.tdpbsud.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile_dpbsud(&c, a, b);
}
void test_tile_dpbusd(__tile1024i a, __tile1024i b, __tile1024i c) {
//CHECK-LABEL: @test_tile_dpbusd
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.tdpbusd.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile_dpbusd(&c, a, b);
}
void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) {
//CHECK-LABEL: @test_tile_dpbuud
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.tdpbuud.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile_dpbuud(&c, a, b);
}
void test_tile_stored(__tile1024i c) {
//CHECK-LABEL: @test_tile_stored
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call void @llvm.x86.tilestored64.internal
__tile_stored(buf, STRIDE, c);
}
void test_tile_zero(__tile1024i c) {
//CHECK-LABEL: @test_tile_zero
//CHECK-DAG: call x86_amx @llvm.x86.tilezero.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile_zero(&c);
}
void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
//CHECK-LABEL: @test_tile_dpbf16ps
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call x86_amx @llvm.x86.tdpbf16ps.internal
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
__tile_dpbf16ps(&a, b, c);
}