libclc: Partially implement nonuniform subgroup reduce functions (#188929)

For AMDGPU these are identical to the uniform case. Stub out the missing
cases with traps to avoid test failures from undefined symbols while
keeping the structure consistent.
This commit is contained in:
Matt Arsenault 2026-03-27 10:47:44 +01:00 committed by GitHub
parent 9a4c6c09a7
commit 35781a7d43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 560 additions and 134 deletions

View File

@ -0,0 +1,29 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_NON_UNIFORM_REDUCE_H__
#define __CLC_SUBGROUP_CLC_SUB_GROUP_NON_UNIFORM_REDUCE_H__
#include "clc/internal/clc.h"
#define __CLC_BODY "clc/subgroup/clc_sub_group_non_uniform_reduce_decl.inc"
#include "clc/integer/gentype.inc"
#define __CLC_BODY "clc/subgroup/clc_sub_group_non_uniform_reduce_decl.inc"
#include "clc/math/gentype.inc"
_CLC_DECL _CLC_OVERLOAD int
__clc_sub_group_non_uniform_reduce_logical_and(int x);
_CLC_DECL _CLC_OVERLOAD int
__clc_sub_group_non_uniform_reduce_logical_or(int x);
_CLC_DECL _CLC_OVERLOAD int
__clc_sub_group_non_uniform_reduce_logical_xor(int x);
#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_NON_UNIFORM_REDUCE_H__

View File

@ -0,0 +1,33 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifdef __CLC_SCALAR
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_add(__CLC_GENTYPE x);
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_mul(__CLC_GENTYPE x);
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_min(__CLC_GENTYPE x);
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_max(__CLC_GENTYPE x);
#ifndef __CLC_FPSIZE
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_and(__CLC_GENTYPE x);
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_or(__CLC_GENTYPE x);
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
__clc_sub_group_non_uniform_reduce_xor(__CLC_GENTYPE x);
#endif // __CLC_FPSIZE
#endif // __CLC_SCALAR

View File

@ -29,6 +29,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
mem_fence/clc_mem_fence.cl
subgroup/clc_subgroup.cl
subgroup/clc_sub_group_broadcast.cl
subgroup/clc_sub_group_non_uniform_reduce.cl
subgroup/clc_sub_group_reduce.cl
subgroup/clc_sub_group_scan.cl
synchronization/clc_sub_group_barrier.cl

View File

@ -0,0 +1,384 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clc/subgroup/clc_sub_group_non_uniform_reduce.h"
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_add(uint x) {
return __builtin_amdgcn_wave_reduce_add_u32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_add(int x) {
return (int)__clc_sub_group_non_uniform_reduce_add((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_add(ulong x) {
return __builtin_amdgcn_wave_reduce_add_u64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_add(long x) {
return (long)__clc_sub_group_non_uniform_reduce_add((ulong)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_min(uint x) {
return __builtin_amdgcn_wave_reduce_min_u32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_min(int x) {
return __builtin_amdgcn_wave_reduce_min_i32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_min(ulong x) {
return __builtin_amdgcn_wave_reduce_min_u64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_min(long x) {
return __builtin_amdgcn_wave_reduce_min_i64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_max(uint x) {
return __builtin_amdgcn_wave_reduce_max_u32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_max(int x) {
return __builtin_amdgcn_wave_reduce_max_i32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_max(ulong x) {
return __builtin_amdgcn_wave_reduce_max_u64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_max(long x) {
return __builtin_amdgcn_wave_reduce_max_i64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float
__clc_sub_group_non_uniform_reduce_add(float x) {
return __builtin_amdgcn_wave_reduce_fadd_f32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double
__clc_sub_group_non_uniform_reduce_add(double x) {
return __builtin_amdgcn_wave_reduce_fadd_f64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float
__clc_sub_group_non_uniform_reduce_min(float x) {
return __builtin_amdgcn_wave_reduce_fmin_f32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double
__clc_sub_group_non_uniform_reduce_min(double x) {
return __builtin_amdgcn_wave_reduce_fmin_f64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float
__clc_sub_group_non_uniform_reduce_max(float x) {
return __builtin_amdgcn_wave_reduce_fmax_f32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double
__clc_sub_group_non_uniform_reduce_max(double x) {
return __builtin_amdgcn_wave_reduce_fmax_f64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half
__clc_sub_group_non_uniform_reduce_add(half x) {
// FIXME: There should be a direct half builtin available.
return (float)__clc_sub_group_non_uniform_reduce_add((float)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half
__clc_sub_group_non_uniform_reduce_min(half x) {
// FIXME: There should be a direct half builtin available.
return (float)__clc_sub_group_non_uniform_reduce_min((float)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half
__clc_sub_group_non_uniform_reduce_max(half x) {
// FIXME: There should be a direct half builtin available.
return (float)__clc_sub_group_non_uniform_reduce_max((float)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_add(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_add((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_add(char x) {
return (char)__clc_sub_group_non_uniform_reduce_add((int)x);
}
// FIXME: There should be a direct short builtin available.
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_add(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_add((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_add(short x) {
return (int)__clc_sub_group_non_uniform_reduce_add((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_min(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_min((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_min(char x) {
return (char)__clc_sub_group_non_uniform_reduce_min((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_min(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_min((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_min(short x) {
return (int)__clc_sub_group_non_uniform_reduce_min((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_max(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_max((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_max(char x) {
return (char)__clc_sub_group_non_uniform_reduce_max((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_max(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_max((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_max(short x) {
return (int)__clc_sub_group_non_uniform_reduce_max((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_and(uint x) {
return __builtin_amdgcn_wave_reduce_and_b32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_and(int x) {
return (int)__clc_sub_group_non_uniform_reduce_and((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_and(ulong x) {
return __builtin_amdgcn_wave_reduce_and_b64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_and(long x) {
return (long)__clc_sub_group_non_uniform_reduce_and((ulong)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_or(uint x) {
return __builtin_amdgcn_wave_reduce_or_b32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_or(int x) {
return (int)__clc_sub_group_non_uniform_reduce_or((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_or(ulong x) {
return __builtin_amdgcn_wave_reduce_or_b64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_or(long x) {
return (long)__clc_sub_group_non_uniform_reduce_or((ulong)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_xor(uint x) {
return __builtin_amdgcn_wave_reduce_xor_b32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_xor(int x) {
return (int)__clc_sub_group_non_uniform_reduce_xor((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_xor(ulong x) {
return __builtin_amdgcn_wave_reduce_xor_b64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_xor(long x) {
return (long)__clc_sub_group_non_uniform_reduce_xor((ulong)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_and(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_and((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_and(char x) {
return (char)__clc_sub_group_non_uniform_reduce_and((int)x);
}
// FIXME: There should be a direct short builtin available.
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_and(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_and((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_and(short x) {
return (int)__clc_sub_group_non_uniform_reduce_and((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_or(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_or((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_or(char x) {
return (char)__clc_sub_group_non_uniform_reduce_or((int)x);
}
// FIXME: There should be a direct short builtin available.
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_or(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_or((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_or(short x) {
return (int)__clc_sub_group_non_uniform_reduce_or((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_xor(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_xor((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_xor(char x) {
return (char)__clc_sub_group_non_uniform_reduce_xor((int)x);
}
// FIXME: There should be a direct short builtin available.
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_xor(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_xor((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_xor(short x) {
return (int)__clc_sub_group_non_uniform_reduce_xor((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
__clc_sub_group_non_uniform_reduce_mul(uint x) {
(void)x;
// TODO:
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_mul(int x) {
return (int)__clc_sub_group_non_uniform_reduce_mul((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
__clc_sub_group_non_uniform_reduce_mul(ulong x) {
(void)x;
// TODO:
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
__clc_sub_group_non_uniform_reduce_mul(long x) {
return (long)__clc_sub_group_non_uniform_reduce_mul((ulong)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
__clc_sub_group_non_uniform_reduce_mul(char x) {
return (char)__clc_sub_group_non_uniform_reduce_mul((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
__clc_sub_group_non_uniform_reduce_mul(uchar x) {
return (uchar)__clc_sub_group_non_uniform_reduce_mul((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
__clc_sub_group_non_uniform_reduce_mul(short x) {
return (short)__clc_sub_group_non_uniform_reduce_mul((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
__clc_sub_group_non_uniform_reduce_mul(ushort x) {
return (ushort)__clc_sub_group_non_uniform_reduce_mul((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_logical_and(int predicate) {
// TODO:
(void)predicate;
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_logical_or(int predicate) {
// TODO:
(void)predicate;
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
__clc_sub_group_non_uniform_reduce_logical_xor(int predicate) {
// TODO:
(void)predicate;
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float
__clc_sub_group_non_uniform_reduce_mul(float x) {
(void)x;
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double
__clc_sub_group_non_uniform_reduce_mul(double x) {
(void)x;
__builtin_trap();
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half
__clc_sub_group_non_uniform_reduce_mul(half x) {
(void)x;
__builtin_trap();
}

View File

@ -6,140 +6,14 @@
//
//===----------------------------------------------------------------------===//
#include "clc/subgroup/clc_sub_group_broadcast.h"
#include "clc/subgroup/clc_sub_group_non_uniform_reduce.h"
#include "clc/subgroup/clc_sub_group_reduce.h"
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_sub_group_reduce_add(uint x) {
return __builtin_amdgcn_wave_reduce_add_u32(x, 0);
}
// The implementation is the same as the nonuniform case, so just call the
// nonuniform versions of every function.
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_reduce_add(int x) {
return (int)__clc_sub_group_reduce_add((uint)x);
}
#define __CLC_BODY "clc_sub_group_reduce.inc"
#include "clc/integer/gentype.inc"
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong __clc_sub_group_reduce_add(ulong x) {
return __builtin_amdgcn_wave_reduce_add_u64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long __clc_sub_group_reduce_add(long x) {
return (long)__clc_sub_group_reduce_add((ulong)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_sub_group_reduce_min(uint x) {
return __builtin_amdgcn_wave_reduce_min_u32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_reduce_min(int x) {
return __builtin_amdgcn_wave_reduce_min_i32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong __clc_sub_group_reduce_min(ulong x) {
return __builtin_amdgcn_wave_reduce_min_u64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long __clc_sub_group_reduce_min(long x) {
return __builtin_amdgcn_wave_reduce_min_i64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_sub_group_reduce_max(uint x) {
return __builtin_amdgcn_wave_reduce_max_u32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_reduce_max(int x) {
return __builtin_amdgcn_wave_reduce_max_i32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong __clc_sub_group_reduce_max(ulong x) {
return __builtin_amdgcn_wave_reduce_max_u64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST long __clc_sub_group_reduce_max(long x) {
return __builtin_amdgcn_wave_reduce_max_i64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float __clc_sub_group_reduce_add(float x) {
return __builtin_amdgcn_wave_reduce_fadd_f32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double __clc_sub_group_reduce_add(double x) {
return __builtin_amdgcn_wave_reduce_fadd_f64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float __clc_sub_group_reduce_min(float x) {
return __builtin_amdgcn_wave_reduce_fmin_f32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double __clc_sub_group_reduce_min(double x) {
return __builtin_amdgcn_wave_reduce_fmin_f64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST float __clc_sub_group_reduce_max(float x) {
return __builtin_amdgcn_wave_reduce_fmax_f32(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST double __clc_sub_group_reduce_max(double x) {
return __builtin_amdgcn_wave_reduce_fmax_f64(x, 0);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half __clc_sub_group_reduce_add(half x) {
// FIXME: There should be a direct half builtin available.
return (float)__clc_sub_group_reduce_add((float)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half __clc_sub_group_reduce_min(half x) {
// FIXME: There should be a direct half builtin available.
return (float)__clc_sub_group_reduce_min((float)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST half __clc_sub_group_reduce_max(half x) {
// FIXME: There should be a direct half builtin available.
return (float)__clc_sub_group_reduce_max((float)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar __clc_sub_group_reduce_add(uchar x) {
return (uchar)__clc_sub_group_reduce_add((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char __clc_sub_group_reduce_add(char x) {
return (char)__clc_sub_group_reduce_add((int)x);
}
// FIXME: There should be a direct short builtin available.
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort __clc_sub_group_reduce_add(ushort x) {
return (ushort)__clc_sub_group_reduce_add((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short __clc_sub_group_reduce_add(short x) {
return (int)__clc_sub_group_reduce_add((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar __clc_sub_group_reduce_min(uchar x) {
return (uchar)__clc_sub_group_reduce_min((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char __clc_sub_group_reduce_min(char x) {
return (char)__clc_sub_group_reduce_min((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort __clc_sub_group_reduce_min(ushort x) {
return (ushort)__clc_sub_group_reduce_min((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short __clc_sub_group_reduce_min(short x) {
return (int)__clc_sub_group_reduce_min((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar __clc_sub_group_reduce_max(uchar x) {
return (uchar)__clc_sub_group_reduce_max((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST char __clc_sub_group_reduce_max(char x) {
return (char)__clc_sub_group_reduce_max((int)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort __clc_sub_group_reduce_max(ushort x) {
return (ushort)__clc_sub_group_reduce_max((uint)x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST short __clc_sub_group_reduce_max(short x) {
return (int)__clc_sub_group_reduce_max((int)x);
}
#define __CLC_BODY "clc_sub_group_reduce.inc"
#include "clc/math/gentype.inc"

View File

@ -0,0 +1,26 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifdef __CLC_SCALAR
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE
__clc_sub_group_reduce_add(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_add(x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE
__clc_sub_group_reduce_min(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_min(x);
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE
__clc_sub_group_reduce_max(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_max(x);
}
#endif

View File

@ -208,6 +208,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
shared/vstore.cl
subgroup/sub_group_broadcast.cl
subgroup/sub_group_reduce.cl
subgroup/sub_group_non_uniform_reduce.cl
subgroup/sub_group_scan_exclusive.cl
subgroup/sub_group_scan_inclusive.cl
subgroup/subgroup.cl

View File

@ -0,0 +1,30 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clc/subgroup/clc_sub_group_non_uniform_reduce.h"
#define __CLC_BODY "sub_group_non_uniform_reduce.inc"
#include "clc/integer/gentype.inc"
#define __CLC_BODY "sub_group_non_uniform_reduce.inc"
#include "clc/math/gentype.inc"
_CLC_DEF _CLC_OVERLOAD int
sub_group_non_uniform_reduce_logical_and(int predicate) {
return __clc_sub_group_non_uniform_reduce_logical_and(predicate);
}
_CLC_DEF _CLC_OVERLOAD int
sub_group_non_uniform_reduce_logical_or(int predicate) {
return __clc_sub_group_non_uniform_reduce_logical_or(predicate);
}
_CLC_DEF _CLC_OVERLOAD int
sub_group_non_uniform_reduce_logical_xor(int predicate) {
return __clc_sub_group_non_uniform_reduce_logical_xor(predicate);
}

View File

@ -0,0 +1,48 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifdef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_add(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_add(x);
}
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_mul(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_mul(x);
}
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_min(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_min(x);
}
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_max(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_max(x);
}
#ifndef __CLC_FPSIZE
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_and(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_and(x);
}
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_or(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_or(x);
}
_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
sub_group_non_uniform_reduce_xor(__CLC_GENTYPE x) {
return __clc_sub_group_non_uniform_reduce_xor(x);
}
#endif
#endif // __CLC_SCALAR