llvm-project/clang/test/OpenMP/target_teams_ast_print.cpp
Chuanqi Xu 20e9049509
[Serialization] Support loading template specializations lazily (#119333)
Reland https://github.com/llvm/llvm-project/pull/83237

---

(Original comments)

Currently all the specializations of a template (including
instantiation, specialization and partial specializations) will be
loaded at once if we want to instantiate another instance for the
template, or find instantiation for the template, or just want to
complete the redecl chain.

This means basically we need to load every specializations for the
template once the template declaration got loaded. This is bad since
when we load a specialization, we need to load all of its template
arguments. Then we have to deserialize a lot of unnecessary
declarations.

For example,

```
// M.cppm
export module M;
export template <class T>
class A {};

export class ShouldNotBeLoaded {};

export class Temp {
   A<ShouldNotBeLoaded> AS;
};

// use.cpp
import M;
A<int> a;
```

We have a specialization ` A<ShouldNotBeLoaded>` in `M.cppm` and we
instantiate the template `A` in `use.cpp`. Then we will deserialize
`ShouldNotBeLoaded` surprisingly when compiling `use.cpp`. And this
patch tries to avoid that.

Given that the templates are heavily used in C++, this is a pain point
for the performance.

This patch adds MultiOnDiskHashTable for specializations in the
ASTReader. Then we will only deserialize the specializations with the
same template arguments. We made that by using ODRHash for the template
arguments as the key of the hash table.

To review this patch, I think `ASTReaderDecl::AddLazySpecializations`
may be a good entry point.
2024-12-11 09:40:47 +08:00

132 lines
5.3 KiB
C++

// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
struct omp_alloctrait_t {};
typedef void **omp_allocator_handle_t;
extern const omp_allocator_handle_t omp_null_allocator;
extern const omp_allocator_handle_t omp_default_mem_alloc;
extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
extern const omp_allocator_handle_t omp_const_mem_alloc;
extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
extern const omp_allocator_handle_t omp_pteam_mem_alloc;
extern const omp_allocator_handle_t omp_thread_mem_alloc;
void foo() {}
template <class T>
struct S {
operator T() {return T();}
static T TS;
#pragma omp threadprivate(TS)
};
// CHECK: template <class T> struct S {
// CHECK: static T TS;
// CHECK-NEXT: #pragma omp threadprivate(S::TS)
// CHECK: };
// CHECK: template<> struct S<int> {
// CHECK: static int TS;
// CHECK-NEXT: #pragma omp threadprivate(S<int>::TS)
// CHECK-NEXT: }
template <typename T, int C>
T tmain(T argc, T *argv) {
T b = argc, c, d, e, f, g;
static T a;
S<T> s;
omp_alloctrait_t traits[10];
omp_allocator_handle_t my_allocator;
#pragma omp target teams
a=2;
#pragma omp target teams default(none), private(argc,b) firstprivate(argv) shared (d) reduction(+:c) reduction(max:e) num_teams(C) thread_limit(d*C) allocate(argv)
foo();
#pragma omp target teams allocate(my_allocator:f) reduction(^:e, f) reduction(&& : g) uses_allocators(my_allocator(traits))
foo();
#pragma omp target teams ompx_bare num_teams(C, C, C) thread_limit(d*C, d*C, d*C)
foo();
return 0;
}
// CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
// CHECK-NEXT: T b = argc, c, d, e, f, g;
// CHECK-NEXT: static T a;
// CHECK-NEXT: S<T> s;
// CHECK-NEXT: omp_alloctrait_t traits[10];
// CHECK-NEXT: omp_allocator_handle_t my_allocator;
// CHECK-NEXT: #pragma omp target teams{{$}}
// CHECK-NEXT: a = 2;
// CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(C) thread_limit(d * C) allocate(argv)
// CHECK-NEXT: foo()
// CHECK-NEXT: #pragma omp target teams allocate(my_allocator: f) reduction(^: e,f) reduction(&&: g) uses_allocators(my_allocator(traits))
// CHECK-NEXT: foo()
// CHECK: template<> int tmain<int, 5>(int argc, int *argv) {
// CHECK-NEXT: int b = argc, c, d, e, f, g;
// CHECK-NEXT: static int a;
// CHECK-NEXT: S<int> s;
// CHECK-NEXT: omp_alloctrait_t traits[10];
// CHECK-NEXT: omp_allocator_handle_t my_allocator;
// CHECK-NEXT: #pragma omp target teams
// CHECK-NEXT: a = 2;
// CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(5) thread_limit(d * 5) allocate(argv)
// CHECK-NEXT: foo()
// CHECK-NEXT: #pragma omp target teams allocate(my_allocator: f) reduction(^: e,f) reduction(&&: g) uses_allocators(my_allocator(traits))
// CHECK-NEXT: foo()
// CHECK: template<> long tmain<long, 1>(long argc, long *argv) {
// CHECK-NEXT: long b = argc, c, d, e, f, g;
// CHECK-NEXT: static long a;
// CHECK-NEXT: S<long> s;
// CHECK-NEXT: omp_alloctrait_t traits[10];
// CHECK-NEXT: omp_allocator_handle_t my_allocator;
// CHECK-NEXT: #pragma omp target teams
// CHECK-NEXT: a = 2;
// CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(1) thread_limit(d * 1) allocate(argv)
// CHECK-NEXT: foo()
// CHECK-NEXT: #pragma omp target teams allocate(my_allocator: f) reduction(^: e,f) reduction(&&: g) uses_allocators(my_allocator(traits))
// CHECK-NEXT: foo()
// CHECK-NEXT: #pragma omp target teams ompx_bare num_teams(1,1,1) thread_limit(d * 1,d * 1,d * 1)
// CHECK-NEXT: foo();
enum Enum { };
int main (int argc, char **argv) {
long x;
int b = argc, c, d, e, f, g;
static int a;
#pragma omp threadprivate(a)
Enum ee;
// CHECK: Enum ee;
#pragma omp target teams
// CHECK-NEXT: #pragma omp target teams
a=2;
// CHECK-NEXT: a = 2;
#pragma omp target teams ompx_bare num_teams(1) thread_limit(32)
// CHECK-NEXT: #pragma omp target teams ompx_bare num_teams(1) thread_limit(32)
a=3;
// CHECK-NEXT: a = 3;
#pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(2, 4, 6)
// CHECK-NEXT: #pragma omp target teams ompx_bare num_teams(1,2,3) thread_limit(2,4,6)
a=4;
// CHECK-NEXT: a = 4;
#pragma omp target teams default(none), private(argc,b) num_teams(f) firstprivate(argv) reduction(| : c, d) reduction(* : e) thread_limit(f+g)
// CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) num_teams(f) firstprivate(argv) reduction(|: c,d) reduction(*: e) thread_limit(f + g)
foo();
// CHECK-NEXT: foo();
return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);
}
extern template int S<int>::TS;
extern template long S<long>::TS;
#endif