Matt Arsenault 9bdd9dc895
AMDGPU: Mark workitem ID intrinsics with range attribute (#136196)
This avoids the need to have special handling at every use site.
Unfortunately this means we unnecessarily emit AssertZext in the DAG
(where we already directly understand the range of the intrinsic), andt
we regress in undefined cases as we don't fold out asserts on undef.
2025-04-18 12:27:38 +02:00

907 lines
30 KiB
Common Lisp

// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK-AMDGCN,CHECK %s
// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK,CHECK-SPIRV %s
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
typedef unsigned long ulong;
typedef unsigned int uint;
typedef unsigned short ushort;
typedef half __attribute__((ext_vector_type(2))) half2;
typedef short __attribute__((ext_vector_type(2))) short2;
typedef ushort __attribute__((ext_vector_type(2))) ushort2;
typedef uint __attribute__((ext_vector_type(4))) uint4;
// CHECK-LABEL: @test_div_scale_f64
// CHECK: {{.*}}call{{.*}} { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
// CHECK: store i32 [[FLAGEXT]]
void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
{
bool flag;
*out = __builtin_amdgcn_div_scale(a, b, true, &flag);
*flagout = flag;
}
// CHECK-LABEL: @test_div_scale_f32(
// CHECK: {{.*}}call{{.*}} { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8
// CHECK: store i8 [[FLAGEXT]]
void test_div_scale_f32(global float* out, global bool* flagout, float a, float b)
{
bool flag;
*out = __builtin_amdgcn_div_scalef(a, b, true, &flag);
*flagout = flag;
}
// CHECK-LABEL: @test_div_scale_f32_global_ptr(
// CHECK: {{.*}}call{{.*}} { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8
// CHECK: store i8 [[FLAGEXT]]
void test_div_scale_f32_global_ptr(global float* out, global int* flagout, float a, float b, global bool* flag)
{
*out = __builtin_amdgcn_div_scalef(a, b, true, flag);
}
// CHECK-LABEL: @test_div_scale_f32_generic_ptr(
// CHECK: {{.*}}call{{.*}} { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8
// CHECK: store i8 [[FLAGEXT]]
void test_div_scale_f32_generic_ptr(global float* out, global int* flagout, float a, float b, global bool* flag_arg)
{
generic bool* flag = flag_arg;
*out = __builtin_amdgcn_div_scalef(a, b, true, flag);
}
// CHECK-LABEL: @test_div_fmas_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.div.fmas.f32
void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
{
*out = __builtin_amdgcn_div_fmasf(a, b, c, d);
}
// CHECK-LABEL: @test_div_fmas_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.div.fmas.f64
void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
{
*out = __builtin_amdgcn_div_fmas(a, b, c, d);
}
// CHECK-LABEL: @test_div_fixup_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.div.fixup.f32
void test_div_fixup_f32(global float* out, float a, float b, float c)
{
*out = __builtin_amdgcn_div_fixupf(a, b, c);
}
// CHECK-LABEL: @test_div_fixup_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.div.fixup.f64
void test_div_fixup_f64(global double* out, double a, double b, double c)
{
*out = __builtin_amdgcn_div_fixup(a, b, c);
}
// CHECK-LABEL: @test_trig_preop_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.trig.preop.f32
void test_trig_preop_f32(global float* out, float a, int b)
{
*out = __builtin_amdgcn_trig_preopf(a, b);
}
// CHECK-LABEL: @test_trig_preop_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.trig.preop.f64
void test_trig_preop_f64(global double* out, double a, int b)
{
*out = __builtin_amdgcn_trig_preop(a, b);
}
// CHECK-LABEL: @test_rcp_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.rcp.f32
void test_rcp_f32(global float* out, float a)
{
*out = __builtin_amdgcn_rcpf(a);
}
// CHECK-LABEL: @test_rcp_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.rcp.f64
void test_rcp_f64(global double* out, double a)
{
*out = __builtin_amdgcn_rcp(a);
}
// CHECK-LABEL: @test_sqrt_f32
// CHECK: {{.*}}call{{.*}} float @llvm.{{((amdgcn.){0,1})}}sqrt.f32
void test_sqrt_f32(global float* out, float a)
{
*out = __builtin_amdgcn_sqrtf(a);
}
// CHECK-LABEL: @test_sqrt_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.sqrt.f64
void test_sqrt_f64(global double* out, double a)
{
*out = __builtin_amdgcn_sqrt(a);
}
// CHECK-LABEL: @test_rsq_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.rsq.f32
void test_rsq_f32(global float* out, float a)
{
*out = __builtin_amdgcn_rsqf(a);
}
// CHECK-LABEL: @test_rsq_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.rsq.f64
void test_rsq_f64(global double* out, double a)
{
*out = __builtin_amdgcn_rsq(a);
}
// CHECK-LABEL: @test_rsq_clamp_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.rsq.clamp.f32
void test_rsq_clamp_f32(global float* out, float a)
{
*out = __builtin_amdgcn_rsq_clampf(a);
}
// CHECK-LABEL: @test_rsq_clamp_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.rsq.clamp.f64
void test_rsq_clamp_f64(global double* out, double a)
{
*out = __builtin_amdgcn_rsq_clamp(a);
}
// CHECK-LABEL: @test_sin_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.sin.f32
void test_sin_f32(global float* out, float a)
{
*out = __builtin_amdgcn_sinf(a);
}
// CHECK-LABEL: @test_cos_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cos.f32
void test_cos_f32(global float* out, float a)
{
*out = __builtin_amdgcn_cosf(a);
}
// CHECK-LABEL: @test_log_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.log.f32
void test_log_f32(global float* out, float a)
{
*out = __builtin_amdgcn_logf(a);
}
// CHECK-LABEL: @test_exp2_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.exp2.f32
void test_exp2_f32(global float* out, float a)
{
*out = __builtin_amdgcn_exp2f(a);
}
// CHECK-LABEL: @test_log_clamp_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.log.clamp.f32
void test_log_clamp_f32(global float* out, float a)
{
*out = __builtin_amdgcn_log_clampf(a);
}
// CHECK-LABEL: @test_ldexp_f32
// CHECK: {{.*}}call{{.*}} float @llvm.ldexp.f32.i32
void test_ldexp_f32(global float* out, float a, int b)
{
*out = __builtin_amdgcn_ldexpf(a, b);
}
// CHECK-LABEL: @test_ldexp_f64
// CHECK: {{.*}}call{{.*}} double @llvm.ldexp.f64.i32
void test_ldexp_f64(global double* out, double a, int b)
{
*out = __builtin_amdgcn_ldexp(a, b);
}
// CHECK-LABEL: @test_frexp_mant_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.frexp.mant.f32
void test_frexp_mant_f32(global float* out, float a)
{
*out = __builtin_amdgcn_frexp_mantf(a);
}
// CHECK-LABEL: @test_frexp_mant_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.frexp.mant.f64
void test_frexp_mant_f64(global double* out, double a)
{
*out = __builtin_amdgcn_frexp_mant(a);
}
// CHECK-LABEL: @test_frexp_exp_f32
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.frexp.exp.i32.f32
void test_frexp_exp_f32(global int* out, float a)
{
*out = __builtin_amdgcn_frexp_expf(a);
}
// CHECK-LABEL: @test_frexp_exp_f64
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.frexp.exp.i32.f64
void test_frexp_exp_f64(global int* out, double a)
{
*out = __builtin_amdgcn_frexp_exp(a);
}
// CHECK-LABEL: @test_fract_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.fract.f32
void test_fract_f32(global int* out, float a)
{
*out = __builtin_amdgcn_fractf(a);
}
// CHECK-LABEL: @test_fract_f64
// CHECK: {{.*}}call{{.*}} double @llvm.amdgcn.fract.f64
void test_fract_f64(global int* out, double a)
{
*out = __builtin_amdgcn_fract(a);
}
// CHECK-LABEL: @test_lerp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
void test_lerp(global int* out, int a, int b, int c)
{
*out = __builtin_amdgcn_lerp(a, b, c);
}
// CHECK-LABEL: @test_sicmp_i32
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
void test_sicmp_i32(global ulong* out, int a, int b)
{
*out = __builtin_amdgcn_sicmp(a, b, 32);
}
// CHECK-LABEL: @test_uicmp_i32
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
void test_uicmp_i32(global ulong* out, uint a, uint b)
{
*out = __builtin_amdgcn_uicmp(a, b, 32);
}
// CHECK-LABEL: @test_sicmp_i64
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 38)
void test_sicmp_i64(global ulong* out, long a, long b)
{
*out = __builtin_amdgcn_sicmpl(a, b, 39-1);
}
// CHECK-LABEL: @test_uicmp_i64
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 35)
void test_uicmp_i64(global ulong* out, ulong a, ulong b)
{
*out = __builtin_amdgcn_uicmpl(a, b, 30+5);
}
// CHECK-LABEL: @test_ds_swizzle
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.swizzle(i32 %a, i32 32)
void test_ds_swizzle(global int* out, int a)
{
*out = __builtin_amdgcn_ds_swizzle(a, 32);
}
// CHECK-LABEL: @test_ds_permute
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.permute(i32 %a, i32 %b)
void test_ds_permute(global int* out, int a, int b)
{
out[0] = __builtin_amdgcn_ds_permute(a, b);
}
// CHECK-LABEL: @test_ds_bpermute
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.bpermute(i32 %a, i32 %b)
void test_ds_bpermute(global int* out, int a, int b)
{
*out = __builtin_amdgcn_ds_bpermute(a, b);
}
// CHECK-LABEL: @test_readfirstlane
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.readfirstlane.i32(i32 %a)
void test_readfirstlane(global int* out, int a)
{
*out = __builtin_amdgcn_readfirstlane(a);
}
// CHECK-LABEL: @test_readlane
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.readlane.i32(i32 %a, i32 %b)
void test_readlane(global int* out, int a, int b)
{
*out = __builtin_amdgcn_readlane(a, b);
}
// CHECK-LABEL: @test_fcmp_f32
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 5)
void test_fcmp_f32(global ulong* out, float a, float b)
{
*out = __builtin_amdgcn_fcmpf(a, b, 5);
}
// CHECK-LABEL: @test_fcmp_f64
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.fcmp.i64.f64(double %a, double %b, i32 6)
void test_fcmp_f64(global ulong* out, double a, double b)
{
*out = __builtin_amdgcn_fcmp(a, b, 3+3);
}
// CHECK-LABEL: @test_class_f32
// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f32
void test_class_f32(global float* out, float a, int b)
{
*out = __builtin_amdgcn_classf(a, b);
}
// CHECK-LABEL: @test_class_f64
// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f64
void test_class_f64(global double* out, double a, int b)
{
*out = __builtin_amdgcn_class(a, b);
}
// CHECK-LABEL: @test_buffer_wbinvl1
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.buffer.wbinvl1(
void test_buffer_wbinvl1()
{
__builtin_amdgcn_buffer_wbinvl1();
}
// CHECK-LABEL: @test_s_dcache_inv
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.inv(
void test_s_dcache_inv()
{
__builtin_amdgcn_s_dcache_inv();
}
// CHECK-LABEL: @test_s_waitcnt
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.waitcnt(
void test_s_waitcnt()
{
__builtin_amdgcn_s_waitcnt(0);
}
// CHECK-LABEL: @test_s_sendmsg
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.sendmsg(
void test_s_sendmsg()
{
__builtin_amdgcn_s_sendmsg(1, 0);
}
// CHECK-LABEL: @test_s_sendmsg_var
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.sendmsg(
void test_s_sendmsg_var(int in)
{
__builtin_amdgcn_s_sendmsg(1, in);
}
// CHECK-LABEL: @test_s_sendmsghalt
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.sendmsghalt(
void test_s_sendmsghalt()
{
__builtin_amdgcn_s_sendmsghalt(1, 0);
}
// CHECK-LABEL: @test_s_sendmsghalt
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.sendmsghalt(
void test_s_sendmsghalt_var(int in)
{
__builtin_amdgcn_s_sendmsghalt(1, in);
}
// CHECK-LABEL: @test_s_barrier
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier(
void test_s_barrier()
{
__builtin_amdgcn_s_barrier();
}
// CHECK-LABEL: @test_wave_barrier
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.wave.barrier(
void test_wave_barrier()
{
__builtin_amdgcn_wave_barrier();
}
// CHECK-LABEL: @test_sched_barrier
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.barrier(i32 0)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.barrier(i32 1)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.barrier(i32 4)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.barrier(i32 15)
void test_sched_barrier()
{
__builtin_amdgcn_sched_barrier(0);
__builtin_amdgcn_sched_barrier(1);
__builtin_amdgcn_sched_barrier(4);
__builtin_amdgcn_sched_barrier(15);
}
// CHECK-LABEL: @test_sched_group_barrier
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.group.barrier(i32 0, i32 1, i32 2)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.group.barrier(i32 1, i32 2, i32 4)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.group.barrier(i32 4, i32 8, i32 16)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.sched.group.barrier(i32 15, i32 10000, i32 -1)
void test_sched_group_barrier()
{
__builtin_amdgcn_sched_group_barrier(0, 1, 2);
__builtin_amdgcn_sched_group_barrier(1, 2, 4);
__builtin_amdgcn_sched_group_barrier(4, 8, 16);
__builtin_amdgcn_sched_group_barrier(15, 10000, -1);
}
// CHECK-LABEL: @test_iglp_opt
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 0)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 1)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 4)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.iglp.opt(i32 15)
void test_iglp_opt()
{
__builtin_amdgcn_iglp_opt(0);
__builtin_amdgcn_iglp_opt(1);
__builtin_amdgcn_iglp_opt(4);
__builtin_amdgcn_iglp_opt(15);
}
// CHECK-LABEL: @test_s_sleep
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.sleep(i32 1)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.sleep(i32 15)
void test_s_sleep()
{
__builtin_amdgcn_s_sleep(1);
__builtin_amdgcn_s_sleep(15);
}
// CHECK-LABEL: @test_s_incperflevel
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.incperflevel(i32 1)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.incperflevel(i32 15)
void test_s_incperflevel()
{
__builtin_amdgcn_s_incperflevel(1);
__builtin_amdgcn_s_incperflevel(15);
}
// CHECK-LABEL: @test_s_decperflevel
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.decperflevel(i32 1)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.decperflevel(i32 15)
void test_s_decperflevel()
{
__builtin_amdgcn_s_decperflevel(1);
__builtin_amdgcn_s_decperflevel(15);
}
// CHECK-LABEL: @test_s_setprio
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.setprio(i16 0)
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.setprio(i16 3)
void test_s_setprio()
{
__builtin_amdgcn_s_setprio(0);
__builtin_amdgcn_s_setprio(3);
}
// CHECK-LABEL: @test_cubeid(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
void test_cubeid(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubeid(a, b, c);
}
// CHECK-LABEL: @test_cubesc(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
void test_cubesc(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubesc(a, b, c);
}
// CHECK-LABEL: @test_cubetc(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
void test_cubetc(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubetc(a, b, c);
}
// CHECK-LABEL: @test_cubema(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
void test_cubema(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubema(a, b, c);
}
// CHECK-LABEL: @test_read_exec(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global ulong* out) {
*out = __builtin_amdgcn_read_exec();
}
// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1){{.*}} #[[$NOUNWIND_READONLY:[0-9]+]]
// CHECK-LABEL: @test_read_exec_lo(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global uint* out) {
*out = __builtin_amdgcn_read_exec_lo();
}
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1){{.*}} #[[$NOUNWIND_READONLY:[0-9]+]]
// CHECK-LABEL: @test_read_exec_hi(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true)
// CHECK: lshr i64 [[A:%.*]], 32
// CHECK: trunc nuw i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
// CHECK-LABEL: @test_dispatch_ptr
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
#if !defined(__SPIRV__)
void test_dispatch_ptr(__constant unsigned char ** out)
#else
void test_dispatch_ptr(__attribute__((address_space(4))) unsigned char ** out)
#endif
{
*out = __builtin_amdgcn_dispatch_ptr();
}
// CHECK-LABEL: @test_queue_ptr
// CHECK: {{.*}}call{{.*}} ptr addrspace(4) @llvm.amdgcn.queue.ptr()
#if !defined(__SPIRV__)
void test_queue_ptr(__constant unsigned char ** out)
#else
void test_queue_ptr(__attribute__((address_space(4))) unsigned char ** out)
#endif
{
*out = __builtin_amdgcn_queue_ptr();
}
// CHECK-LABEL: @test_kernarg_segment_ptr
// CHECK: {{.*}}call{{.*}} ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
#if !defined(__SPIRV__)
void test_kernarg_segment_ptr(__constant unsigned char ** out)
#else
void test_kernarg_segment_ptr(__attribute__((address_space(4))) unsigned char ** out)
#endif
{
*out = __builtin_amdgcn_kernarg_segment_ptr();
}
// CHECK-LABEL: @test_implicitarg_ptr
// CHECK: {{.*}}call{{.*}} ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
#if !defined(__SPIRV__)
void test_implicitarg_ptr(__constant unsigned char ** out)
#else
void test_implicitarg_ptr(__attribute__((address_space(4))) unsigned char ** out)
#endif
{
*out = __builtin_amdgcn_implicitarg_ptr();
}
// CHECK-LABEL: @test_get_group_id(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.workgroup.id.x()
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.workgroup.id.y()
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.workgroup.id.z()
void test_get_group_id(int d, global int *out)
{
switch (d) {
case 0: *out = __builtin_amdgcn_workgroup_id_x(); break;
case 1: *out = __builtin_amdgcn_workgroup_id_y(); break;
case 2: *out = __builtin_amdgcn_workgroup_id_z(); break;
default: *out = 0;
}
}
// CHECK-LABEL: @test_s_getreg(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.s.getreg(i32 0)
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.s.getreg(i32 1)
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.s.getreg(i32 65535)
void test_s_getreg(volatile global uint *out)
{
*out = __builtin_amdgcn_s_getreg(0);
*out = __builtin_amdgcn_s_getreg(1);
*out = __builtin_amdgcn_s_getreg(65535);
}
// CHECK-LABEL: @test_get_local_id(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.workitem.id.x()
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.workitem.id.y()
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.workitem.id.z()
void test_get_local_id(int d, global int *out)
{
switch (d) {
case 0: *out = __builtin_amdgcn_workitem_id_x(); break;
case 1: *out = __builtin_amdgcn_workitem_id_y(); break;
case 2: *out = __builtin_amdgcn_workitem_id_z(); break;
default: *out = 0;
}
}
// CHECK: declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
// CHECK: declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
// CHECK: declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
// CHECK-LABEL: @test_get_workgroup_size(
// CHECK: {{.*}}call align 8 dereferenceable(256){{.*}} ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 12
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 14
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 16
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
void test_get_workgroup_size(int d, global int *out)
{
switch (d) {
case 0: *out = __builtin_amdgcn_workgroup_size_x() + 1; break;
case 1: *out = __builtin_amdgcn_workgroup_size_y(); break;
case 2: *out = __builtin_amdgcn_workgroup_size_z(); break;
default: *out = 0;
}
}
// CHECK-LABEL: @test_get_grid_size(
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !range [[$GRID_RANGE:![0-9]+]], !invariant.load
void test_get_grid_size(int d, global int *out)
{
switch (d) {
case 0: *out = __builtin_amdgcn_grid_size_x(); break;
case 1: *out = __builtin_amdgcn_grid_size_y(); break;
case 2: *out = __builtin_amdgcn_grid_size_z(); break;
default: *out = 0;
}
}
// CHECK-LABEL: @test_fmed3_f32
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.fmed3.f32(
void test_fmed3_f32(global float* out, float a, float b, float c)
{
*out = __builtin_amdgcn_fmed3f(a, b, c);
}
// CHECK-LABEL: @test_s_getpc
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.getpc()
void test_s_getpc(global ulong* out)
{
*out = __builtin_amdgcn_s_getpc();
}
// CHECK-LABEL: @test_ds_append_lds(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %{{.+}}, i1 false)
#if !defined(__SPIRV__)
kernel void test_ds_append_lds(global int* out, local int* ptr) {
#else
kernel void test_ds_append_lds(__attribute__((address_space(1))) int* out, __attribute__((address_space(3))) int* ptr) {
#endif
*out = __builtin_amdgcn_ds_append(ptr);
}
// CHECK-LABEL: @test_ds_consume_lds(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %{{.+}}, i1 false)
#if !defined(__SPIRV__)
kernel void test_ds_consume_lds(global int* out, local int* ptr) {
#else
kernel void test_ds_consume_lds(__attribute__((address_space(1))) int* out, __attribute__((address_space(3))) int* ptr) {
#endif
*out = __builtin_amdgcn_ds_consume(ptr);
}
// CHECK-LABEL: @test_gws_init(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
kernel void test_gws_init(uint value, uint id) {
__builtin_amdgcn_ds_gws_init(value, id);
}
// CHECK-LABEL: @test_gws_barrier(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
kernel void test_gws_barrier(uint value, uint id) {
__builtin_amdgcn_ds_gws_barrier(value, id);
}
// CHECK-LABEL: @test_gws_sema_v(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.v(i32 %id)
kernel void test_gws_sema_v(uint id) {
__builtin_amdgcn_ds_gws_sema_v(id);
}
// CHECK-LABEL: @test_gws_sema_br(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id)
kernel void test_gws_sema_br(uint value, uint id) {
__builtin_amdgcn_ds_gws_sema_br(value, id);
}
// CHECK-LABEL: @test_gws_sema_p(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.p(i32 %id)
kernel void test_gws_sema_p(uint id) {
__builtin_amdgcn_ds_gws_sema_p(id);
}
// CHECK-LABEL: @test_mbcnt_lo(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mbcnt.lo(i32 %src0, i32 %src1)
kernel void test_mbcnt_lo(global uint* out, uint src0, uint src1) {
*out = __builtin_amdgcn_mbcnt_lo(src0, src1);
}
// CHECK-LABEL: @test_mbcnt_hi(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mbcnt.hi(i32 %src0, i32 %src1)
kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
*out = __builtin_amdgcn_mbcnt_hi(src0, src1);
}
// CHECK-LABEL: @test_alignbit(
// CHECK: tail call{{.*}} i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2)
kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_alignbit(src0, src1, src2);
}
// CHECK-LABEL: @test_alignbyte(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2)
kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_alignbyte(src0, src1, src2);
}
// CHECK-LABEL: @test_ubfe(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
kernel void test_ubfe(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_ubfe(src0, src1, src2);
}
// CHECK-LABEL: @test_sbfe(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sbfe(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sbfe(src0, src1, src2);
}
// CHECK-LABEL: @test_cvt_pkrtz(
// CHECK: tail call{{.*}} <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1)
kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
}
// CHECK-LABEL: @test_cvt_pknorm_i16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
}
// CHECK-LABEL: @test_cvt_pknorm_u16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
}
// CHECK-LABEL: @test_cvt_pk_i16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
*out = __builtin_amdgcn_cvt_pk_i16(src0, src1);
}
// CHECK-LABEL: @test_cvt_pk_u16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1)
kernel void test_cvt_pk_u16(global ushort2* out, uint src0, uint src1) {
*out = __builtin_amdgcn_cvt_pk_u16(src0, src1);
}
// CHECK-LABEL: @test_cvt_pk_u8_f32
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2)
kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src2) {
*out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
}
// CHECK-LABEL: @test_sad_u8(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_u8(src0, src1, src2);
}
// CHECK-LABEL: test_msad_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_msad_u8(src0, src1, src2);
}
// CHECK-LABEL: test_sad_hi_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
}
// CHECK-LABEL: @test_sad_u16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_u16(src0, src1, src2);
}
// CHECK-LABEL: @test_qsad_pk_u16_u8(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
*out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
}
// CHECK-LABEL: @test_mqsad_pk_u16_u8(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
*out = __builtin_amdgcn_mqsad_pk_u16_u8(src0, src1, src2);
}
// CHECK-LABEL: test_mqsad_u32_u8(
// CHECK: {{.*}}call{{.*}} <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src0, i32 %src1, <4 x i32> %src2)
kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 src2) {
*out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
}
// CHECK-LABEL: test_s_setreg(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
kernel void test_s_setreg(uint val) {
__builtin_amdgcn_s_setreg(8193, val);
}
// CHECK-LABEL test_atomic_inc_dec(
#if !defined(__SPIRV__)
void test_atomic_inc_dec(local uint *lptr, global uint *gptr, uint val) {
#else
void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribute__((address_space(1))) uint *gptr, uint val) {
#endif
uint res;
// CHECK: atomicrmw uinc_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4
res = __builtin_amdgcn_atomic_inc32(lptr, val, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4
res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4
res = __builtin_amdgcn_atomic_dec32(gptr, val, __ATOMIC_SEQ_CST, "");
// CHECK: atomicrmw volatile udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4
#if !defined(__SPIRV__)
res = __builtin_amdgcn_atomic_dec32((volatile global uint*)gptr, val, __ATOMIC_SEQ_CST, "");
#else
res = __builtin_amdgcn_atomic_dec32((volatile __attribute__((address_space(1))) uint*)gptr, val, __ATOMIC_SEQ_CST, "");
#endif
}
// CHECK-LABEL test_wavefrontsize(
unsigned test_wavefrontsize() {
// CHECK-AMDGCN: ret i32 {{[0-9]+}}
// CHECK-SPIRV: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize()
return __builtin_amdgcn_wavefrontsize();
}
// CHECK-LABEL test_flt_rounds(
unsigned test_flt_rounds() {
// CHECK: {{.*}}call{{.*}} i32 @llvm.get.rounding()
unsigned mode = __builtin_flt_rounds();
#if !defined(__SPIRV__)
// CHECK-AMDGCN: call void @llvm.set.rounding(i32 %0)
__builtin_set_flt_rounds(mode);
#endif
return mode;
}
// CHECK-LABEL test_get_fpenv(
unsigned long test_get_fpenv() {
// CHECK: {{.*}}call{{.*}} i64 @llvm.get.fpenv.i64()
return __builtin_amdgcn_get_fpenv();
}
// CHECK-LABEL test_set_fpenv(
void test_set_fpenv(unsigned long env) {
// CHECK: {{.*}}call{{.*}} void @llvm.set.fpenv.i64(i64 %[[ENV:.+]])
__builtin_amdgcn_set_fpenv(env);
}
// CHECK-DAG: [[$GRID_RANGE]] = !{i32 1, i32 0}
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }