
when an initializer is variable (I handled the constant case in a previous patch). This has three pieces: 1. Enhance AggValueSlot to have a 'isZeroed' bit to tell CGExprAgg that the memory being stored into has previously been memset to zero. 2. Teach CGExprAgg to not emit stores of zero to isZeroed memory. 3. Teach CodeGenFunction::EmitAggExpr to scan initializers to determine whether they are profitable to emit a memset + inividual stores vs stores for everything. The heuristic used is that a global has to be more than 16 bytes and has to be 3/4 zero to be candidate for this xform. The two testcases are illustrative of the scenarios this catches. We now codegen test9 into: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 400, i32 4, i1 false) %.array = getelementptr inbounds [100 x i32]* %Arr, i32 0, i32 0 %tmp = load i32* %X.addr, align 4 store i32 %tmp, i32* %.array and test10 into: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 392, i32 8, i1 false) %tmp = getelementptr inbounds %struct.b* %S, i32 0, i32 0 %tmp1 = getelementptr inbounds %struct.a* %tmp, i32 0, i32 0 %tmp2 = load i32* %X.addr, align 4 store i32 %tmp2, i32* %tmp1, align 4 %tmp5 = getelementptr inbounds %struct.b* %S, i32 0, i32 3 %tmp10 = getelementptr inbounds %struct.a* %tmp5, i32 0, i32 4 %tmp11 = load i32* %X.addr, align 4 store i32 %tmp11, i32* %tmp10, align 4 Previously we produced 99 stores of zero for test9 and also tons for test10. This xforms should substantially speed up -O0 builds when it kicks in as well as reducing code size and optimizer heartburn on insane cases. This resolves PR279. llvm-svn: 120692
103 lines
1.8 KiB
C
103 lines
1.8 KiB
C
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
|
|
|
void f1() {
|
|
// Scalars in braces.
|
|
int a = { 1 };
|
|
}
|
|
|
|
void f2() {
|
|
int a[2][2] = { { 1, 2 }, { 3, 4 } };
|
|
int b[3][3] = { { 1, 2 }, { 3, 4 } };
|
|
int *c[2] = { &a[1][1], &b[2][2] };
|
|
int *d[2][2] = { {&a[1][1], &b[2][2]}, {&a[0][0], &b[1][1]} };
|
|
int *e[3][3] = { {&a[1][1], &b[2][2]}, {&a[0][0], &b[1][1]} };
|
|
char ext[3][3] = {".Y",".U",".V"};
|
|
}
|
|
|
|
typedef void (* F)(void);
|
|
extern void foo(void);
|
|
struct S { F f; };
|
|
void f3() {
|
|
struct S a[1] = { { foo } };
|
|
}
|
|
|
|
// Constants
|
|
// CHECK: @g3 = constant i32 10
|
|
// CHECK: @f4.g4 = internal constant i32 12
|
|
const int g3 = 10;
|
|
int f4() {
|
|
static const int g4 = 12;
|
|
return g4;
|
|
}
|
|
|
|
// PR6537
|
|
typedef union vec3 {
|
|
struct { double x, y, z; };
|
|
double component[3];
|
|
} vec3;
|
|
vec3 f5(vec3 value) {
|
|
return (vec3) {{
|
|
.x = value.x
|
|
}};
|
|
}
|
|
|
|
// rdar://problem/8154689
|
|
void f6() {
|
|
int x;
|
|
long ids[] = { (long) &x };
|
|
}
|
|
|
|
|
|
|
|
|
|
// CHECK: @test7 = global{{.*}}{ i32 0, [4 x i8] c"bar\00" }
|
|
// PR8217
|
|
struct a7 {
|
|
int b;
|
|
char v[];
|
|
};
|
|
|
|
struct a7 test7 = { .b = 0, .v = "bar" };
|
|
|
|
|
|
// PR279 comment #3
|
|
char test8(int X) {
|
|
char str[100000] = "abc"; // tail should be memset.
|
|
return str[X];
|
|
// CHECK: @test8(
|
|
// CHECK: call void @llvm.memset
|
|
// CHECK: store i8 97
|
|
// CHECK: store i8 98
|
|
// CHECK: store i8 99
|
|
}
|
|
|
|
void bar(void*);
|
|
|
|
// PR279
|
|
int test9(int X) {
|
|
int Arr[100] = { X }; // Should use memset
|
|
bar(Arr);
|
|
// CHECK: @test9
|
|
// CHECK: call void @llvm.memset
|
|
// CHECK-NOT: store i32 0
|
|
// CHECK: call void @bar
|
|
}
|
|
|
|
struct a {
|
|
int a, b, c, d, e, f, g, h, i, j, k, *p;
|
|
};
|
|
|
|
struct b {
|
|
struct a a,b,c,d,e,f,g;
|
|
};
|
|
|
|
int test10(int X) {
|
|
struct b S = { .a.a = X, .d.e = X, .f.e = 0, .f.f = 0, .f.p = 0 };
|
|
bar(&S);
|
|
|
|
// CHECK: @test10
|
|
// CHECK: call void @llvm.memset
|
|
// CHECK-NOT: store i32 0
|
|
// CHECK: call void @bar
|
|
}
|