Chris Lattner 27a3631bac Improve codegen for initializer lists to use memset more aggressively
when an initializer is variable (I handled the constant case in a previous
patch).  This has three pieces:

1. Enhance AggValueSlot to have a 'isZeroed' bit to tell CGExprAgg that
   the memory being stored into has previously been memset to zero.
2. Teach CGExprAgg to not emit stores of zero to isZeroed memory.
3. Teach CodeGenFunction::EmitAggExpr to scan initializers to determine
   whether they are profitable to emit a memset + inividual stores vs
   stores for everything.

The heuristic used is that a global has to be more than 16 bytes and
has to be 3/4 zero to be candidate for this xform.  The two testcases
are illustrative of the scenarios this catches.  We now codegen test9 into:

 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 400, i32 4, i1 false)
 %.array = getelementptr inbounds [100 x i32]* %Arr, i32 0, i32 0
 %tmp = load i32* %X.addr, align 4
 store i32 %tmp, i32* %.array

and test10 into:

  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 392, i32 8, i1 false)
  %tmp = getelementptr inbounds %struct.b* %S, i32 0, i32 0
  %tmp1 = getelementptr inbounds %struct.a* %tmp, i32 0, i32 0
  %tmp2 = load i32* %X.addr, align 4
  store i32 %tmp2, i32* %tmp1, align 4
  %tmp5 = getelementptr inbounds %struct.b* %S, i32 0, i32 3
  %tmp10 = getelementptr inbounds %struct.a* %tmp5, i32 0, i32 4
  %tmp11 = load i32* %X.addr, align 4
  store i32 %tmp11, i32* %tmp10, align 4

Previously we produced 99 stores of zero for test9 and also tons for test10.
This xforms should substantially speed up -O0 builds when it kicks in as well
as reducing code size and optimizer heartburn on insane cases.  This resolves
PR279.

llvm-svn: 120692
2010-12-02 07:07:26 +00:00

103 lines
1.8 KiB
C

// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
void f1() {
// Scalars in braces.
int a = { 1 };
}
void f2() {
int a[2][2] = { { 1, 2 }, { 3, 4 } };
int b[3][3] = { { 1, 2 }, { 3, 4 } };
int *c[2] = { &a[1][1], &b[2][2] };
int *d[2][2] = { {&a[1][1], &b[2][2]}, {&a[0][0], &b[1][1]} };
int *e[3][3] = { {&a[1][1], &b[2][2]}, {&a[0][0], &b[1][1]} };
char ext[3][3] = {".Y",".U",".V"};
}
typedef void (* F)(void);
extern void foo(void);
struct S { F f; };
void f3() {
struct S a[1] = { { foo } };
}
// Constants
// CHECK: @g3 = constant i32 10
// CHECK: @f4.g4 = internal constant i32 12
const int g3 = 10;
int f4() {
static const int g4 = 12;
return g4;
}
// PR6537
typedef union vec3 {
struct { double x, y, z; };
double component[3];
} vec3;
vec3 f5(vec3 value) {
return (vec3) {{
.x = value.x
}};
}
// rdar://problem/8154689
void f6() {
int x;
long ids[] = { (long) &x };
}
// CHECK: @test7 = global{{.*}}{ i32 0, [4 x i8] c"bar\00" }
// PR8217
struct a7 {
int b;
char v[];
};
struct a7 test7 = { .b = 0, .v = "bar" };
// PR279 comment #3
char test8(int X) {
char str[100000] = "abc"; // tail should be memset.
return str[X];
// CHECK: @test8(
// CHECK: call void @llvm.memset
// CHECK: store i8 97
// CHECK: store i8 98
// CHECK: store i8 99
}
void bar(void*);
// PR279
int test9(int X) {
int Arr[100] = { X }; // Should use memset
bar(Arr);
// CHECK: @test9
// CHECK: call void @llvm.memset
// CHECK-NOT: store i32 0
// CHECK: call void @bar
}
struct a {
int a, b, c, d, e, f, g, h, i, j, k, *p;
};
struct b {
struct a a,b,c,d,e,f,g;
};
int test10(int X) {
struct b S = { .a.a = X, .d.e = X, .f.e = 0, .f.f = 0, .f.p = 0 };
bar(&S);
// CHECK: @test10
// CHECK: call void @llvm.memset
// CHECK-NOT: store i32 0
// CHECK: call void @bar
}