This broke the Chromium build. Consider the following code:
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
float fsum = 0.f;
int i;
#if defined(__clang__)
#pragma clang loop vectorize_width(4)
#endif
for (i = 0; i < width; ++i) {
float v = *src++;
fsum += v * v;
*dst++ = v * scale;
}
return fsum;
}
Compiling at -Oz, Clang now warns:
$ clang++ -target x86_64 -Oz -c /tmp/a.cc
/tmp/a.cc:1:7: warning: loop not vectorized: the optimizer was unable to
perform the requested transformation; the transformation might be disabled or
specified as part of an unsupported transformation ordering
[-Wpass-failed=transform-warning]
this suggests it's not actually enabling vectorization hard enough.
At -Os it asserts instead:
$ build.release/bin/clang++ -target x86_64 -Os -c /tmp/a.cc
clang-10: /work/llvm.monorepo/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:2734: void
llvm::InnerLoopVectorizer::emitMemRuntimeChecks(llvm::Loop*, llvm::BasicBlock*): Assertion `
!BB->getParent()->hasOptSize() && "Cannot emit memory checks when optimizing for size"' failed.
Of course neither of these are what the developer expected from the pragma.
> Specifying the vectorization width was supposed to implicitly enable
> vectorization, except that it wasn't really doing this. It was only
> setting the vectorize.width metadata, but not vectorize.enable.
>
> This should fix PR27643.
>
> Differential Revision: https://reviews.llvm.org/D66290
llvm-svn: 372225
77 lines
2.3 KiB
C++
77 lines
2.3 KiB
C++
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
|
|
|
|
void test0(int *List, int Length) {
|
|
// CHECK-LABEL: @{{.*}}test0{{.*}}(
|
|
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP0:.*]]
|
|
|
|
#pragma clang loop vectorize(enable)
|
|
for (int i = 0; i < Length; i++)
|
|
List[i] = i * 2;
|
|
}
|
|
|
|
void test1(int *List, int Length) {
|
|
// CHECK-LABEL: @{{.*}}test1{{.*}}(
|
|
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP1:.*]]
|
|
|
|
#pragma clang loop vectorize(enable) vectorize_predicate(enable)
|
|
for (int i = 0; i < Length; i++)
|
|
List[i] = i * 2;
|
|
}
|
|
|
|
void test2(int *List, int Length) {
|
|
// CHECK-LABEL: @{{.*}}test2{{.*}}(
|
|
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP2:.*]]
|
|
|
|
#pragma clang loop vectorize(enable) vectorize_predicate(disable)
|
|
for (int i = 0; i < Length; i++)
|
|
List[i] = i * 2;
|
|
}
|
|
|
|
// vectorize_predicate(enable) implies vectorize(enable)
|
|
void test3(int *List, int Length) {
|
|
// CHECK-LABEL: @{{.*}}test3{{.*}}(
|
|
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP3:.*]]
|
|
|
|
#pragma clang loop vectorize_predicate(enable)
|
|
for (int i = 0; i < Length; i++)
|
|
List[i] = i * 2;
|
|
}
|
|
|
|
// Check that disabling vectorization means a vectorization width of 1, and
|
|
// also that vectorization_predicate isn't enabled.
|
|
void test4(int *List, int Length) {
|
|
// CHECK-LABEL: @{{.*}}test4{{.*}}(
|
|
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP4:.*]]
|
|
|
|
#pragma clang loop vectorize(disable)
|
|
for (int i = 0; i < Length; i++)
|
|
List[i] = i * 2;
|
|
}
|
|
|
|
// Check that vectorize and vectorize_predicate are disabled.
|
|
void test5(int *List, int Length) {
|
|
// CHECK-LABEL: @{{.*}}test5{{.*}}(
|
|
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP5:.*]]
|
|
|
|
#pragma clang loop vectorize(disable) vectorize_predicate(enable)
|
|
for (int i = 0; i < Length; i++)
|
|
List[i] = i * 2;
|
|
}
|
|
|
|
|
|
// CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3}
|
|
// CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true}
|
|
|
|
// CHECK-NEXT: ![[LOOP1]] = distinct !{![[LOOP1]], !5, !3}
|
|
// CHECK-NEXT: !5 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
|
|
|
|
// CHECK-NEXT: ![[LOOP2]] = distinct !{![[LOOP2]], !7, !3}
|
|
// CHECK-NEXT: !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 false}
|
|
|
|
// CHECK-NEXT: ![[LOOP3]] = distinct !{![[LOOP3]], !5, !3}
|
|
|
|
// CHECK-NEXT: ![[LOOP4]] = distinct !{![[LOOP4]], !10}
|
|
// CHECK-NEXT: !10 = !{!"llvm.loop.vectorize.width", i32 1}
|
|
|
|
// CHECK-NEXT: ![[LOOP5]] = distinct !{![[LOOP5]], !10}
|