libclc: Add work group scan functions (#188829)
This commit is contained in:
parent
5c0c421481
commit
56e1510d21
23
libclc/clc/include/clc/clc_target_defines.h
Normal file
23
libclc/clc/include/clc/clc_target_defines.h
Normal file
@ -0,0 +1,23 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_CLC_TARGET_DEFINES_H__
|
||||
#define __CLC_CLC_TARGET_DEFINES_H__
|
||||
|
||||
#if defined(__AMDGPU__) || defined(__NVPTX__)
|
||||
#define __CLC_MAX_WORK_GROUP_SIZE 1024
|
||||
#define __CLC_MIN_NATIVE_SUB_GROUP_SIZE 32
|
||||
#else
|
||||
#define __CLC_MAX_WORK_GROUP_SIZE 4096
|
||||
#define __CLC_MIN_NATIVE_SUB_GROUP_SIZE 1
|
||||
#endif
|
||||
|
||||
#define __CLC_MAX_NUM_WORK_GROUPS \
|
||||
(__CLC_MAX_WORK_GROUP_SIZE / __CLC_MIN_NATIVE_SUB_GROUP_SIZE)
|
||||
|
||||
#endif // __CLC_CLC_TARGET_DEFINES_H__
|
||||
20
libclc/clc/include/clc/collective/clc_work_group_scan.h
Normal file
20
libclc/clc/include/clc/collective/clc_work_group_scan.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_COLLECTIVE_CLC_WORK_GROUP_SCAN_H__
|
||||
#define __CLC_COLLECTIVE_CLC_WORK_GROUP_SCAN_H__
|
||||
|
||||
#include "clc/internal/clc.h"
|
||||
|
||||
#define __CLC_BODY "clc/collective/clc_work_group_scan_decl.inc"
|
||||
#include "clc/integer/gentype.inc"
|
||||
|
||||
#define __CLC_BODY "clc/collective/clc_work_group_scan_decl.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
|
||||
#endif // __CLC_COLLECTIVE_CLC_WORK_GROUP_SCAN_H__
|
||||
@ -0,0 +1,25 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if defined(__CLC_SCALAR) && \
|
||||
(defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64)
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_inclusive_add(__CLC_GENTYPE x);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_inclusive_min(__CLC_GENTYPE x);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_inclusive_max(__CLC_GENTYPE x);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_exclusive_add(__CLC_GENTYPE x);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_exclusive_min(__CLC_GENTYPE x);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_exclusive_max(__CLC_GENTYPE x);
|
||||
|
||||
#endif
|
||||
@ -18,6 +18,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
|
||||
atomic/clc_atomic_store.cl
|
||||
collective/clc_work_group_any_all.cl
|
||||
collective/clc_work_group_broadcast.cl
|
||||
collective/clc_work_group_scan.cl
|
||||
collective/clc_work_group_reduce.cl
|
||||
common/clc_degrees.cl
|
||||
common/clc_radians.cl
|
||||
|
||||
38
libclc/clc/lib/generic/collective/clc_work_group_scan.cl
Normal file
38
libclc/clc/lib/generic/collective/clc_work_group_scan.cl
Normal file
@ -0,0 +1,38 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "clc/collective/clc_work_group_scan.h"
|
||||
|
||||
#include "clc/clc_target_defines.h"
|
||||
|
||||
#include "clc/atomic/clc_atomic_load.h"
|
||||
#include "clc/atomic/clc_atomic_store.h"
|
||||
#include "clc/math/clc_fmax.h"
|
||||
#include "clc/math/clc_fmin.h"
|
||||
#include "clc/shared/clc_max.h"
|
||||
#include "clc/shared/clc_min.h"
|
||||
|
||||
#include "clc/subgroup/clc_sub_group_scan.h"
|
||||
#include "clc/subgroup/clc_subgroup.h"
|
||||
#include "clc/synchronization/clc_work_group_barrier.h"
|
||||
#include "clc/workitem/clc_get_num_sub_groups.h"
|
||||
#include "clc/workitem/clc_get_sub_group_id.h"
|
||||
#include "clc/workitem/clc_get_sub_group_local_id.h"
|
||||
|
||||
#pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable
|
||||
|
||||
enum __CLC_WORK_GROUP_SCAN_OP {
|
||||
__CLC_WORK_GROUP_SCAN_ADD,
|
||||
__CLC_WORK_GROUP_SCAN_MIN,
|
||||
__CLC_WORK_GROUP_SCAN_MAX
|
||||
};
|
||||
|
||||
#define __CLC_BODY "clc_work_group_scan.inc"
|
||||
#include "clc/integer/gentype.inc"
|
||||
|
||||
#define __CLC_BODY "clc_work_group_scan.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
155
libclc/clc/lib/generic/collective/clc_work_group_scan.inc
Normal file
155
libclc/clc/lib/generic/collective/clc_work_group_scan.inc
Normal file
@ -0,0 +1,155 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if defined(__CLC_SCALAR) && \
|
||||
(defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64)
|
||||
|
||||
static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE sub_group_scan_op(
|
||||
__CLC_GENTYPE x, enum __CLC_WORK_GROUP_SCAN_OP opcode, bool inclusive) {
|
||||
switch (opcode) {
|
||||
case __CLC_WORK_GROUP_SCAN_ADD:
|
||||
return inclusive ? __clc_sub_group_scan_inclusive_add(x)
|
||||
: __clc_sub_group_scan_exclusive_add(x);
|
||||
case __CLC_WORK_GROUP_SCAN_MIN:
|
||||
return inclusive ? __clc_sub_group_scan_inclusive_min(x)
|
||||
: __clc_sub_group_scan_exclusive_min(x);
|
||||
case __CLC_WORK_GROUP_SCAN_MAX:
|
||||
return inclusive ? __clc_sub_group_scan_inclusive_max(x)
|
||||
: __clc_sub_group_scan_exclusive_max(x);
|
||||
}
|
||||
}
|
||||
|
||||
static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE scan_op(
|
||||
__CLC_GENTYPE x, __CLC_GENTYPE y, enum __CLC_WORK_GROUP_SCAN_OP opcode) {
|
||||
switch (opcode) {
|
||||
case __CLC_WORK_GROUP_SCAN_ADD:
|
||||
return x + y;
|
||||
case __CLC_WORK_GROUP_SCAN_MIN:
|
||||
return __clc_min(x, y);
|
||||
case __CLC_WORK_GROUP_SCAN_MAX:
|
||||
return __clc_max(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __CLC_FPSIZE
|
||||
static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE work_group_scan_identity_value(
|
||||
__CLC_GENTYPE x, enum __CLC_WORK_GROUP_SCAN_OP opcode) {
|
||||
(void)x;
|
||||
switch (opcode) {
|
||||
case __CLC_WORK_GROUP_SCAN_ADD:
|
||||
return __CLC_FP_LIT(0.0);
|
||||
case __CLC_WORK_GROUP_SCAN_MIN:
|
||||
return (__CLC_GENTYPE)INFINITY;
|
||||
case __CLC_WORK_GROUP_SCAN_MAX:
|
||||
return (__CLC_GENTYPE)-INFINITY;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE work_group_scan_identity_value(
|
||||
__CLC_GENTYPE x, enum __CLC_WORK_GROUP_SCAN_OP opcode) {
|
||||
(void)x;
|
||||
switch (opcode) {
|
||||
case __CLC_WORK_GROUP_SCAN_ADD:
|
||||
return (__CLC_GENTYPE)0;
|
||||
case __CLC_WORK_GROUP_SCAN_MIN:
|
||||
#ifdef __CLC_GEN_S
|
||||
return (__CLC_GENTYPE)LONG_MAX;
|
||||
#else
|
||||
return (__CLC_GENTYPE)ULONG_MAX;
|
||||
#endif
|
||||
case __CLC_WORK_GROUP_SCAN_MAX:
|
||||
#ifdef __CLC_GEN_S
|
||||
return (__CLC_GENTYPE)LONG_MIN;
|
||||
#else
|
||||
return (__CLC_GENTYPE)0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static _CLC_OVERLOAD __CLC_GENTYPE __clc_work_group_scan_impl(
|
||||
__CLC_GENTYPE a, enum __CLC_WORK_GROUP_SCAN_OP opcode, bool inclusive) {
|
||||
uint n = __clc_get_num_sub_groups();
|
||||
__CLC_GENTYPE t = sub_group_scan_op(a, opcode, inclusive);
|
||||
if (n == 1)
|
||||
return t;
|
||||
|
||||
__local __CLC_GENTYPE scratch[__CLC_MAX_NUM_WORK_GROUPS];
|
||||
uint l = __clc_get_sub_group_local_id();
|
||||
uint i = __clc_get_sub_group_id();
|
||||
|
||||
if (l == __clc_get_sub_group_size() - 1u) {
|
||||
__CLC_GENTYPE store_val = inclusive ? t : scan_op(a, t, opcode);
|
||||
__scoped_atomic_store_n(&scratch[i], store_val, __ATOMIC_RELAXED,
|
||||
__MEMORY_SCOPE_WRKGRP);
|
||||
}
|
||||
|
||||
__clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL);
|
||||
|
||||
if (i == 0) {
|
||||
__CLC_GENTYPE s =
|
||||
l < n ? __scoped_atomic_load_n(&scratch[l], __ATOMIC_RELAXED,
|
||||
__MEMORY_SCOPE_WRKGRP)
|
||||
: work_group_scan_identity_value(t, opcode);
|
||||
s = sub_group_scan_op(s, opcode, /*inclusive=*/true);
|
||||
if (l < n) {
|
||||
__scoped_atomic_store_n(&scratch[l], s, __ATOMIC_RELAXED,
|
||||
__MEMORY_SCOPE_WRKGRP);
|
||||
}
|
||||
}
|
||||
|
||||
__clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL);
|
||||
|
||||
__CLC_GENTYPE ret = t;
|
||||
if (i != 0) {
|
||||
__CLC_GENTYPE load_scratch = __scoped_atomic_load_n(
|
||||
&scratch[i - 1], __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
|
||||
ret = scan_op(t, load_scratch, opcode);
|
||||
}
|
||||
|
||||
__clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_inclusive_add(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_ADD,
|
||||
/*inclusive=*/true);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_inclusive_min(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MIN,
|
||||
/*inclusive=*/true);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_inclusive_max(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MAX,
|
||||
/*inclusive=*/true);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_exclusive_add(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_ADD,
|
||||
/*inclusive=*/false);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_exclusive_min(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MIN,
|
||||
/*inclusive=*/false);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_work_group_scan_exclusive_max(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MAX,
|
||||
/*inclusive=*/false);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -46,6 +46,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
|
||||
collective/work_group_any_all.cl
|
||||
collective/work_group_broadcast.cl
|
||||
collective/work_group_reduce.cl
|
||||
collective/work_group_scan.cl
|
||||
common/degrees.cl
|
||||
common/mix.cl
|
||||
common/radians.cl
|
||||
|
||||
15
libclc/opencl/lib/generic/collective/work_group_scan.cl
Normal file
15
libclc/opencl/lib/generic/collective/work_group_scan.cl
Normal file
@ -0,0 +1,15 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clc/collective/clc_work_group_scan.h"
|
||||
|
||||
#define __CLC_BODY "work_group_scan.inc"
|
||||
#include "clc/integer/gentype.inc"
|
||||
|
||||
#define __CLC_BODY "work_group_scan.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
41
libclc/opencl/lib/generic/collective/work_group_scan.inc
Normal file
41
libclc/opencl/lib/generic/collective/work_group_scan.inc
Normal file
@ -0,0 +1,41 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if defined(__CLC_SCALAR) && (defined(__CLC_FPSIZE) || __CLC_GENSIZE >= 32)
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
work_group_scan_inclusive_add(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_inclusive_add(a);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
work_group_scan_inclusive_min(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_inclusive_min(a);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
work_group_scan_inclusive_max(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_inclusive_max(a);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
work_group_scan_exclusive_add(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_exclusive_add(a);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
work_group_scan_exclusive_min(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_exclusive_min(a);
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
work_group_scan_exclusive_max(__CLC_GENTYPE a) {
|
||||
return __clc_work_group_scan_exclusive_max(a);
|
||||
}
|
||||
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user