From f55a5cf014d283d88b0caccb9f74260bdf3cd7d1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Feb 2026 06:56:00 -0500 Subject: [PATCH] [OpenMP] Only generate call to __kmpc_global_thread_num when needed (#182669) This patch is a small optimization to only generate a call to __kmpc_global_thread_num if the result is actually used. --- clang/test/CIR/CodeGenOpenMP/omp-llvmir.c | 1 - clang/test/OpenMP/cancel_codegen.cpp | 367 +- .../irbuilder_nested_openmp_parallel_empty.c | 13 +- .../OpenMP/irbuilder_nested_parallel_for.c | 3406 ++++++++--------- clang/test/OpenMP/nested_loop_codegen.cpp | 804 ++-- clang/test/OpenMP/parallel_codegen.cpp | 498 +-- clang/test/OpenMP/taskgroup_codegen.cpp | 33 +- .../parallel-private-reduction-worstcase.f90 | 18 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 +- .../OpenMP/parallel_region_merging.ll | 163 +- .../openmp-dist_schedule_with_wsloop.mlir | 32 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 3 +- .../LLVMIR/openmp-outline-infinite-loop.mlir | 1 - .../openmp-parallel-reduction-cleanup.mlir | 1 - .../openmp-parallel-reduction-multiblock.mlir | 1 - .../openmp-reduction-array-sections.mlir | 14 +- .../Target/LLVMIR/openmp-reduction-byref.mlir | 1 - .../LLVMIR/openmp-reduction-init-arg.mlir | 1 - 18 files changed, 2680 insertions(+), 2681 deletions(-) diff --git a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c index d32753ae4475..518152a4db01 100644 --- a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c +++ b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c @@ -46,7 +46,6 @@ // LLVM: br label %[[ENTRY:.*]] // LLVM: [[ENTRY]]: -// LLVM: %[[THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // LLVM: br label %[[OMP_PARALLEL:.*]] // LLVM: [[OMP_PARALLEL]]: diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 600aae211087..acd2b9ce3414 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -310,8 +310,8 @@ for (int i = 0; i < argc; ++i) { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !nonnull [[META3]], !align [[META5:![0-9]+]] // CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr @flag, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] @@ -381,29 +381,29 @@ for (int i = 0; i < argc; ++i) { // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]] -// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]] -// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14]] +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META17:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META17]] +// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META17]] +// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META17]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META17]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META17]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META17]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META17]] // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP9]], i32 4) // CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: -// CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]] +// CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META17]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK1: .cancel.continue.i: -// CHECK1-NEXT: store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]] +// CHECK1-NEXT: store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META17]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK1: .omp_outlined..exit: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]] +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META17]] // CHECK1-NEXT: ret i32 0 // // @@ -561,8 +561,8 @@ for (int i = 0; i < argc; ++i) { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !nonnull [[META3]], !align [[META5]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !nonnull [[META3]], !align [[META5]] // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -700,42 +700,41 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER32:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND33:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND34:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE35:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_38:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I40:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: // CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8 // CHECK3-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[ARGV_ADDR]], ptr [[GEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]) +// CHECK3-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK3: omp.par.exit: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER:%.*]] // CHECK3: omp_section_loop.preheader: // CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: store i32 0, ptr [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -755,8 +754,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: omp_section_loop.body.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 // CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] // CHECK3: omp_section_loop.body.case.split: @@ -769,93 +768,95 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK3: omp_section_loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] -// CHECK3: omp_section_loop.preheader16: -// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND29]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[P_UPPERBOUND30]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE31]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, ptr [[P_LASTITER28]], ptr [[P_LOWERBOUND29]], ptr [[P_UPPERBOUND30]], ptr [[P_STRIDE31]], i32 1, i32 0) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_LOWERBOUND29]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[P_UPPERBOUND30]], align 4 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER15:%.*]] +// CHECK3: omp_section_loop.preheader15: +// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND33]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[P_UPPERBOUND34]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE35]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, ptr [[P_LASTITER32]], ptr [[P_LOWERBOUND33]], ptr [[P_UPPERBOUND34]], ptr [[P_STRIDE35]], i32 1, i32 0) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_LOWERBOUND33]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[P_UPPERBOUND34]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] -// CHECK3: omp_section_loop.header17: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] -// CHECK3: omp_section_loop.cond18: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] -// CHECK3: omp_section_loop.body19: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER16:%.*]] +// CHECK3: omp_section_loop.header16: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV22:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER15]] ], [ [[OMP_SECTION_LOOP_NEXT24:%.*]], [[OMP_SECTION_LOOP_INC19:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND17:%.*]] +// CHECK3: omp_section_loop.cond17: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP23:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV22]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP23]], label [[OMP_SECTION_LOOP_BODY18:%.*]], label [[OMP_SECTION_LOOP_EXIT20:%.*]] +// CHECK3: omp_section_loop.body18: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV22]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE29:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY18_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case26: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK3: omp_section_loop.body.case25: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3) // CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case26.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case26.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case29: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case25.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY18_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case28: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE29_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE29_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case29.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER29:%.*]] -// CHECK3: omp_section_loop.body.case29.section.after30: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE29_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case29.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:.*]] -// CHECK3: omp_section_loop.body19.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC20:.*]] -// CHECK3: omp_section_loop.inc20: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] -// CHECK3: omp_section_loop.exit21: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] -// CHECK3: omp_section_loop.after22: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case28.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER29:%.*]] +// CHECK3: omp_section_loop.body.case28.section.after29: +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK3: omp_region.finalize: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case28.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY18_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body18.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC19]] +// CHECK3: omp_section_loop.inc19: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT24]] = add nuw i32 [[OMP_SECTION_LOOP_IV22]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER16]] +// CHECK3: omp_section_loop.exit20: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM37]]) +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER21:%.*]] +// CHECK3: omp_section_loop.after21: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB35]], ptr [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[SUB39:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB39]], ptr [[DOTCAPTURE_EXPR_38]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_38]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_38]], align 4 +// CHECK3-NEXT: [[CMP42:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP42]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_38]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 @@ -869,29 +870,29 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP43:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP43]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD40]], ptr [[I36]], align 4 +// CHECK3-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD44]], ptr [[I40]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, ptr @flag, align 4 -// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL45:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL45]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT:.*]] -// CHECK3: omp_section_loop.body.case26.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18:.*]] -// CHECK3: omp_section_loop.body.case29.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT21:.*]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK3: omp_section_loop.body.case25.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT20]] +// CHECK3: omp_section_loop.body.case28.cncl: +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -902,29 +903,29 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD47:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD47]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP36]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14]]) -// CHECK3-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], ptr [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14]]) +// CHECK3-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], ptr [[TMP36]]) // CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @main.omp_outlined) // CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @main.omp_outlined.1) // CHECK3-NEXT: store i32 0, ptr [[R]], align 4 @@ -937,9 +938,9 @@ for (int i = 0; i < argc; ++i) { // CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: // CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load ptr, ptr [[GEP_ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load ptr, ptr [[GEP_ARGC_ADDR]], align 8, !align [[META3:![0-9]+]] // CHECK3-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[LOADGEP_ARGV_ADDR:%.*]] = load ptr, ptr [[GEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[LOADGEP_ARGV_ADDR:%.*]] = load ptr, ptr [[GEP_ARGV_ADDR]], align 8, !align [[META4:![0-9]+]] // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -948,57 +949,57 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP16:%.*]], label [[TMP3:%.*]] // CHECK3: 3: -// CHECK3-NEXT: %[[GTN:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK3-NEXT: %[[CANCEL_POINT:.*]] = call i32 @__kmpc_cancellationpoint(ptr @1, i32 %[[GTN]], i32 1) -// CHECK3-NEXT: %[[COND:.*]] = icmp eq i32 %[[CANCEL_POINT]], 0 -// CHECK3-NEXT: br i1 %[[COND]], label %[[SPLIT:.*]], label %[[CNCL:.*]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +// CHECK3-NEXT: br i1 [[TMP5]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] // CHECK3: .cncl: -// CHECK3-NEXT: br label %[[FINI:.*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] // CHECK3: .fini: -// CHECK3-NEXT: br label %[[EXIT_STUB:omp.par.exit.exitStub]] +// CHECK3-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK3: .split: // CHECK3-NEXT: br label [[TMP6:%.*]] // CHECK3: 6: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 0 -// CHECK3-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX3]], align 1 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK3: .cncl7: -// CHECK3-NEXT: br label %[[FINI]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP7]] to i8 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 0 +// CHECK3-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX4]], align 1 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK3-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTCONT:%.*]], label [[DOTCNCL6:%.*]] +// CHECK3: .cncl6: +// CHECK3-NEXT: br label [[DOTFINI]] // CHECK3: .cont: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK3-NEXT: store i8 [[CONV9]], ptr [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP13]], i64 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], [[TMP12]] +// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK3-NEXT: store i8 [[CONV10]], ptr [[ARRAYIDX8]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label %[[FINI]] +// CHECK3-NEXT: br label [[DOTFINI]] // CHECK3: 16: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK3: .cncl4: -// CHECK3-NEXT: br label %[[FINI]] -// CHECK3: .split3: -// CHECK3-NEXT: br label {{.+}} +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTSPLIT2:%.*]], label [[DOTCNCL3:%.*]] +// CHECK3: .cncl3: +// CHECK3-NEXT: br label [[DOTFINI]] +// CHECK3: .split2: +// CHECK3-NEXT: br label [[TMP6]] // CHECK3: omp.par.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1023,29 +1024,29 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12:![0-9]+]] -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META12]] -// CHECK3-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]] -// CHECK3-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]] -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META12]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]] +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]] +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]] +// CHECK3-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]] +// CHECK3-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]] +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]] +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]] +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]] // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) // CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK3: .cancel.exit.i: -// CHECK3-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META12]] +// CHECK3-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK3: .cancel.continue.i: -// CHECK3-NEXT: store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META12]] +// CHECK3-NEXT: store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK3: .omp_outlined..exit: -// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META12]] +// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: ret i32 0 // // @@ -1092,7 +1093,9 @@ for (int i = 0; i < argc; ++i) { // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[FINI:%.*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] +// CHECK3: .fini: +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1103,7 +1106,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT:.*]] +// CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: @@ -1156,7 +1159,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[DOTFINI:.%*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] // CHECK3: .fini: // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: @@ -1167,11 +1170,11 @@ for (int i = 0; i < argc; ++i) { // CHECK3: .omp.sections.case2.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] // CHECK3: .omp.sections.case2.section.after: -// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:.*]] +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] // CHECK3: omp_region.finalize: -// CHECK3-NEXT: br label [[OMP_SECTIONS_EXIT:.*]] +// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case2.cncl: -// CHECK3-NEXT: br label [[FINI:.*]] +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1180,14 +1183,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1214,8 +1217,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK3-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !nonnull [[META17:![0-9]+]], !align [[META3]] +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !nonnull [[META17]], !align [[META3]] // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c index 96962f71c709..c45cd2498b71 100644 --- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -12,14 +12,11 @@ // ALL-LABEL: @_Z17nested_parallel_0v( // ALL-NEXT: entry: -// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z17nested_parallel_0v..omp_par.1) +// ALL-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:.*]], i32 0, ptr @_Z17nested_parallel_0v..omp_par.1) // ALL-NEXT: br label [[OMP_PAR_EXIT:%.*]] -// ALL: omp.par.exit7: -// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] -// ALL: omp.par.exit.exitStub: +// ALL: omp.par.exit: // ALL-NEXT: ret void // void nested_parallel_0(void) { @@ -40,7 +37,6 @@ void nested_parallel_0(void) { // ALL-NEXT: store ptr [[R:%.*]], ptr [[R_ADDR]], align 8 // ALL-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 // ALL-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 -// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: // ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG14]], i32 0, i32 0 @@ -49,7 +45,7 @@ void nested_parallel_0(void) { // ALL-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR16]], align 8 // ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG14]], i32 0, i32 2 // ALL-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR17]], align 8 -// ALL-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z17nested_parallel_1Pfid..omp_par.2, ptr [[STRUCTARG14]]) +// ALL-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:.*]], i32 1, ptr @_Z17nested_parallel_1Pfid..omp_par.2, ptr [[STRUCTARG14]]) // ALL-NEXT: br label [[OMP_PAR_EXIT:%.*]] // ALL: omp.par.exit: // ALL-NEXT: ret void @@ -73,7 +69,6 @@ void nested_parallel_1(float *r, int a, double b) { // ALL-NEXT: store ptr [[R:%.*]], ptr [[R_ADDR]], align 8 // ALL-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 // ALL-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 -// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: // ALL-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -82,7 +77,7 @@ void nested_parallel_1(float *r, int a, double b) { // ALL-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 // ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // ALL-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 -// ALL-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z17nested_parallel_2Pfid..omp_par.5, ptr [[STRUCTARG]]) +// ALL-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:.*]], i32 1, ptr @_Z17nested_parallel_2Pfid..omp_par.5, ptr [[STRUCTARG]]) // ALL-NEXT: br label [[OMP_PAR_EXIT:%.*]] // ALL: omp.par.exit: // ALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index 56cf9644de5e..cb38d7383644 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -74,10 +74,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_0v // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14parallel_for_0v..omp_par) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @_Z14parallel_for_0v..omp_par) // CHECK-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK: omp.par.exit: // CHECK-NEXT: ret void @@ -114,8 +113,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[DOTCOUNT]], 1 // CHECK-NEXT: store i32 [[TMP4]], ptr [[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 // CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[TMP5]] @@ -128,16 +127,18 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp_loop.exit: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK: omp_loop.after: // CHECK-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK: omp.par.region.parallel.after: // CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK: omp.par.pre_finalize: -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK-NEXT: br label [[DOTFINI:%.*]] +// CHECK: .fini: +// CHECK-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK: omp_loop.body: // CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP5]] // CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED1]]) @@ -150,7 +151,7 @@ void parallel_for_2(float *r, int a, double b) { // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -161,7 +162,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -184,13 +185,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -204,7 +205,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // @@ -212,23 +213,22 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid // CHECK-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 // CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR18]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]) +// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG]]) // CHECK-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK: omp.par.exit: // CHECK-NEXT: ret void @@ -238,11 +238,11 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { // CHECK-NEXT: omp.par.entry: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META4]] // CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META7:![0-9]+]] // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 @@ -250,7 +250,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: // CHECK-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -260,44 +259,46 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 // CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]) -// CHECK-NEXT: br label [[OMP_PAR_EXIT:%.*]] -// CHECK: omp.par.exit7: +// CHECK-NEXT: br label [[OMP_PAR_EXIT6:%.*]] +// CHECK: omp.par.exit6: // CHECK-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK: omp.par.region.parallel.after: // CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK: omp.par.pre_finalize: -// CHECK-NEXT: br label [[OMP_PAR_EXIT16_EXITSTUB:%.*]] +// CHECK-NEXT: br label [[DOTFINI14:%.*]] +// CHECK: .fini14: +// CHECK-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK: omp.par.exit.exitStub: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par -// CHECK-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: omp.par.entry4: +// CHECK-SAME: (ptr noalias [[TID_ADDR1:%.*]], ptr noalias [[ZERO_ADDR2:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: omp.par.entry3: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META4]] // CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TID_ADDR_LOCAL8:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR2]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL8]], align 4 -// CHECK-NEXT: [[TID9:%.*]] = load i32, ptr [[TID_ADDR_LOCAL8]], align 4 +// CHECK-NEXT: [[TID_ADDR_LOCAL7:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR1]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL7]], align 4 +// CHECK-NEXT: [[TID8:%.*]] = load i32, ptr [[TID_ADDR_LOCAL7]], align 4 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-NEXT: [[AGG_CAPTURED11:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: br label [[OMP_PAR_REGION5:%.*]] -// CHECK: omp.par.region5: +// CHECK-NEXT: br label [[OMP_PAR_REGION4:%.*]] +// CHECK: omp.par.region4: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store ptr [[I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED11]], i32 0, i32 0 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 // CHECK-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) @@ -308,8 +309,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1 // CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 // CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]] @@ -322,36 +323,38 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp_loop.exit: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM15]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK: omp_loop.after: -// CHECK-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]] -// CHECK: omp.par.region5.parallel.after: -// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE6:%.*]] -// CHECK: omp.par.pre_finalize6: -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK-NEXT: br label [[OMP_PAR_REGION4_PARALLEL_AFTER:%.*]] +// CHECK: omp.par.region4.parallel.after: +// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE5:%.*]] +// CHECK: omp.par.pre_finalize5: +// CHECK-NEXT: br label [[DOTFINI:%.*]] +// CHECK: .fini: +// CHECK-NEXT: br label [[OMP_PAR_EXIT6_EXITSTUB:%.*]] // CHECK: omp_loop.body: // CHECK-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]] -// CHECK-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]) +// CHECK-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED11]]) // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 // CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 // CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]] -// CHECK-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float +// CHECK-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD]] to float // CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV13]], ptr [[TMP13]], align 4 +// CHECK-NEXT: store float [[CONV12]], ptr [[TMP13]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: // CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER]] -// CHECK: omp.par.exit7.exitStub: +// CHECK: omp.par.exit6.exitStub: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.2 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -362,7 +365,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -385,13 +388,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.3 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -405,7 +408,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // @@ -417,18 +420,17 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[I185:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I181:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED182:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED183:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR184:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER199:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND200:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND201:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE202:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 // CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -438,55 +440,55 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 // CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]) -// CHECK-NEXT: br label [[OMP_PAR_EXIT184:%.*]] +// CHECK-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK: omp.par.exit: -// CHECK-NEXT: store i32 0, ptr [[I185]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4 +// CHECK-NEXT: store i32 0, ptr [[I181]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED182]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I181]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED183]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I181]], align 4 // CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]) -// CHECK-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]] -// CHECK: omp_loop.preheader190: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4 +// CHECK-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR184]], ptr [[AGG_CAPTURED182]]) +// CHECK-NEXT: [[DOTCOUNT185:%.*]] = load i32, ptr [[DOTCOUNT_ADDR184]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER186:%.*]] +// CHECK: omp_loop.preheader186: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND200]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT185]], 1 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND201]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE202]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM203:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM203]], i32 34, ptr [[P_LASTITER199]], ptr [[P_LOWERBOUND200]], ptr [[P_UPPERBOUND201]], ptr [[P_STRIDE202]], i32 1, i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND200]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND201]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] // CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER191:%.*]] -// CHECK: omp_loop.header191: -// CHECK-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND192:%.*]] -// CHECK: omp_loop.cond192: -// CHECK-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]] -// CHECK: omp_loop.body193: -// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]] -// CHECK-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER187:%.*]] +// CHECK: omp_loop.header187: +// CHECK-NEXT: [[OMP_LOOP_IV193:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER186]] ], [ [[OMP_LOOP_NEXT195:%.*]], [[OMP_LOOP_INC190:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND188:%.*]] +// CHECK: omp_loop.cond188: +// CHECK-NEXT: [[OMP_LOOP_CMP194:%.*]] = icmp ult i32 [[OMP_LOOP_IV193]], [[TMP7]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP194]], label [[OMP_LOOP_BODY189:%.*]], label [[OMP_LOOP_EXIT191:%.*]] +// CHECK: omp_loop.body189: +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV193]], [[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.20(ptr [[I181]], i32 [[TMP8]], ptr [[AGG_CAPTURED183]]) // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double +// CHECK-NEXT: [[CONV196:%.*]] = sitofp i32 [[TMP9]] to double // CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]] -// CHECK-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float +// CHECK-NEXT: [[ADD197:%.*]] = fadd double [[CONV196]], [[TMP10]] +// CHECK-NEXT: [[CONV198:%.*]] = fptrunc double [[ADD197]] to float // CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC194]] -// CHECK: omp_loop.inc194: -// CHECK-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER191]] -// CHECK: omp_loop.exit195: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM208]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER196:%.*]] -// CHECK: omp_loop.after196: +// CHECK-NEXT: store float [[CONV198]], ptr [[TMP11]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC190]] +// CHECK: omp_loop.inc190: +// CHECK-NEXT: [[OMP_LOOP_NEXT195]] = add nuw i32 [[OMP_LOOP_IV193]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER187]] +// CHECK: omp_loop.exit191: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM203]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM204:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM204]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER192:%.*]] +// CHECK: omp_loop.after192: // CHECK-NEXT: ret void // // @@ -494,16 +496,16 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { // CHECK-NEXT: omp.par.entry: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META4]] // CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META7]] +// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[P_LASTITER174:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND175:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND176:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE177:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 @@ -516,10 +518,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I160:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I156:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED157:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED158:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR159:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -536,8 +538,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1 // CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 // CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]] @@ -550,75 +552,76 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp_loop.exit: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK: omp_loop.after: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: [[GEP_A_ADDR215:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR215]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR216:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR216]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]) -// CHECK-NEXT: br label [[OMP_PAR_EXIT159:%.*]] -// CHECK: omp.par.exit11: -// CHECK-NEXT: store i32 0, ptr [[I160]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4 +// CHECK-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR1]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR2:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG]]) +// CHECK-NEXT: br label [[OMP_PAR_EXIT9:%.*]] +// CHECK: omp.par.exit9: +// CHECK-NEXT: store i32 0, ptr [[I156]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED157]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I156]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED158]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I156]], align 4 // CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -// CHECK-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]) -// CHECK-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]] -// CHECK: omp_loop.preheader165: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1 -// CHECK-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE181]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4 +// CHECK-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR159]], ptr [[AGG_CAPTURED157]]) +// CHECK-NEXT: [[DOTCOUNT160:%.*]] = load i32, ptr [[DOTCOUNT_ADDR159]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER161:%.*]] +// CHECK: omp_loop.preheader161: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND175]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT160]], 1 +// CHECK-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND176]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE177]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM178:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM178]], i32 34, ptr [[P_LASTITER174]], ptr [[P_LOWERBOUND175]], ptr [[P_UPPERBOUND176]], ptr [[P_STRIDE177]], i32 1, i32 0) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND175]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND176]], align 4 // CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]] // CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER166:%.*]] -// CHECK: omp_loop.header166: -// CHECK-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND167:%.*]] -// CHECK: omp_loop.cond167: -// CHECK-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]] -// CHECK: omp_loop.exit170: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM182]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM183]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER171:%.*]] -// CHECK: omp_loop.after171: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER162:%.*]] +// CHECK: omp_loop.header162: +// CHECK-NEXT: [[OMP_LOOP_IV168:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER161]] ], [ [[OMP_LOOP_NEXT170:%.*]], [[OMP_LOOP_INC165:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND163:%.*]] +// CHECK: omp_loop.cond163: +// CHECK-NEXT: [[OMP_LOOP_CMP169:%.*]] = icmp ult i32 [[OMP_LOOP_IV168]], [[TMP17]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP169]], label [[OMP_LOOP_BODY164:%.*]], label [[OMP_LOOP_EXIT166:%.*]] +// CHECK: omp_loop.exit166: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM178]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM179:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM179]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER167:%.*]] +// CHECK: omp_loop.after167: // CHECK-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK: omp.par.region.parallel.after: // CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK: omp.par.pre_finalize: -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]] -// CHECK: omp_loop.body168: -// CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]] -// CHECK-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]) +// CHECK-NEXT: br label [[DOTFINI180:%.*]] +// CHECK: .fini180: +// CHECK-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] +// CHECK: omp_loop.body164: +// CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV168]], [[TMP14]] +// CHECK-NEXT: call void @__captured_stmt.18(ptr [[I156]], i32 [[TMP18]], ptr [[AGG_CAPTURED158]]) // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 -// CHECK-NEXT: [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK-NEXT: [[CONV171:%.*]] = sitofp i32 [[TMP19]] to double // CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 -// CHECK-NEXT: [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]] -// CHECK-NEXT: [[CONV177:%.*]] = fptrunc double [[ADD176]] to float +// CHECK-NEXT: [[ADD172:%.*]] = fadd double [[CONV171]], [[TMP20]] +// CHECK-NEXT: [[CONV173:%.*]] = fptrunc double [[ADD172]] to float // CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC169]] -// CHECK: omp_loop.inc169: -// CHECK-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER166]] +// CHECK-NEXT: store float [[CONV173]], ptr [[TMP21]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC165]] +// CHECK: omp_loop.inc165: +// CHECK-NEXT: [[OMP_LOOP_NEXT170]] = add nuw i32 [[OMP_LOOP_IV168]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER162]] // CHECK: omp_loop.body: // CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]] // CHECK-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]) @@ -638,80 +641,79 @@ void parallel_for_2(float *r, int a, double b) { // // // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.22 -// CHECK-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: omp.par.entry8: +// CHECK-SAME: (ptr noalias [[TID_ADDR4:%.*]], ptr noalias [[ZERO_ADDR5:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: omp.par.entry6: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META4]] // CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META7]] +// CHECK-NEXT: [[STRUCTARG205:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL12]], align 4 -// CHECK-NEXT: [[TID13:%.*]] = load i32, ptr [[TID_ADDR_LOCAL12]], align 4 -// CHECK-NEXT: [[I16:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I75:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I135:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 -// CHECK-NEXT: br label [[OMP_PAR_REGION9:%.*]] -// CHECK: omp.par.region9: -// CHECK-NEXT: store i32 0, ptr [[I16]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4 +// CHECK-NEXT: [[P_LASTITER149:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND150:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND151:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE152:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER90:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND91:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND92:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE93:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER32:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND33:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND34:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE35:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TID_ADDR_LOCAL10:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR4]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL10]], align 4 +// CHECK-NEXT: [[TID11:%.*]] = load i32, ptr [[TID_ADDR_LOCAL10]], align 4 +// CHECK-NEXT: [[I14:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I72:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED73:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED74:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR75:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I131:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED132:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED133:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR134:%.*]] = alloca i32, align 4 +// CHECK-NEXT: br label [[OMP_PAR_REGION7:%.*]] +// CHECK: omp.par.region7: +// CHECK-NEXT: store i32 0, ptr [[I14]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED15]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I14]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I14]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -// CHECK-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]) -// CHECK-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]] -// CHECK: omp_loop.preheader21: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1 -// CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4 +// CHECK-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR17]], ptr [[AGG_CAPTURED15]]) +// CHECK-NEXT: [[DOTCOUNT18:%.*]] = load i32, ptr [[DOTCOUNT_ADDR17]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER19:%.*]] +// CHECK: omp_loop.preheader19: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND33]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT18]], 1 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND34]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE35]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, ptr [[P_LASTITER32]], ptr [[P_LOWERBOUND33]], ptr [[P_UPPERBOUND34]], ptr [[P_STRIDE35]], i32 1, i32 0) +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND33]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND34]], align 4 // CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]] // CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER22:%.*]] -// CHECK: omp_loop.header22: -// CHECK-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND23:%.*]] -// CHECK: omp_loop.cond23: -// CHECK-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]] -// CHECK: omp_loop.exit26: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM38]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM39]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER27:%.*]] -// CHECK: omp_loop.after27: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20:%.*]] +// CHECK: omp_loop.header20: +// CHECK-NEXT: [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND21:%.*]] +// CHECK: omp_loop.cond21: +// CHECK-NEXT: [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[TMP9]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]] +// CHECK: omp_loop.exit24: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM36]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM37]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER25:%.*]] +// CHECK: omp_loop.after25: // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: // CHECK-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -721,299 +723,302 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 // CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]) -// CHECK-NEXT: br label [[OMP_PAR_EXIT:%.*]] -// CHECK: omp.par.exit46: -// CHECK-NEXT: store i32 0, ptr [[I75]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4 +// CHECK-NEXT: br label [[OMP_PAR_EXIT43:%.*]] +// CHECK: omp.par.exit43: +// CHECK-NEXT: store i32 0, ptr [[I72]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED73]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I72]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED74]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I72]], align 4 // CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -// CHECK-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]) -// CHECK-NEXT: [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER80:%.*]] -// CHECK: omp_loop.preheader80: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND94]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1 -// CHECK-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE96]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4 +// CHECK-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR75]], ptr [[AGG_CAPTURED73]]) +// CHECK-NEXT: [[DOTCOUNT76:%.*]] = load i32, ptr [[DOTCOUNT_ADDR75]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER77:%.*]] +// CHECK: omp_loop.preheader77: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND91]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT76]], 1 +// CHECK-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND92]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE93]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM94:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM94]], i32 34, ptr [[P_LASTITER90]], ptr [[P_LOWERBOUND91]], ptr [[P_UPPERBOUND92]], ptr [[P_STRIDE93]], i32 1, i32 0) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND91]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND92]], align 4 // CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]] // CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER81:%.*]] -// CHECK: omp_loop.header81: -// CHECK-NEXT: [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND82:%.*]] -// CHECK: omp_loop.cond82: -// CHECK-NEXT: [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]] -// CHECK: omp_loop.exit85: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM97]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM98]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER86:%.*]] -// CHECK: omp_loop.after86: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: br label [[OMP_PARALLEL213:%.*]] -// CHECK: omp_parallel213: -// CHECK-NEXT: [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR211]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR212:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]) -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134:%.*]] -// CHECK: omp.par.exit105: -// CHECK-NEXT: store i32 0, ptr [[I135]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0 -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER78:%.*]] +// CHECK: omp_loop.header78: +// CHECK-NEXT: [[OMP_LOOP_IV84:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER77]] ], [ [[OMP_LOOP_NEXT86:%.*]], [[OMP_LOOP_INC81:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND79:%.*]] +// CHECK: omp_loop.cond79: +// CHECK-NEXT: [[OMP_LOOP_CMP85:%.*]] = icmp ult i32 [[OMP_LOOP_IV84]], [[TMP17]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP85]], label [[OMP_LOOP_BODY80:%.*]], label [[OMP_LOOP_EXIT82:%.*]] +// CHECK: omp_loop.exit82: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM94]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM95:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM95]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER83:%.*]] +// CHECK: omp_loop.after83: +// CHECK-NEXT: br label [[OMP_PARALLEL209:%.*]] +// CHECK: omp_parallel209: +// CHECK-NEXT: [[GEP_A_ADDR206:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG205]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR206]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR207:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG205]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR207]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR208:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG205]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR208]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG205]]) +// CHECK-NEXT: br label [[OMP_PAR_EXIT101:%.*]] +// CHECK: omp.par.exit101: +// CHECK-NEXT: store i32 0, ptr [[I131]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED132]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I131]], ptr [[TMP18]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED133]], i32 0, i32 0 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I131]], align 4 // CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 -// CHECK-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]) -// CHECK-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]] -// CHECK: omp_loop.preheader140: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1 -// CHECK-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE156]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0) -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4 +// CHECK-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR134]], ptr [[AGG_CAPTURED132]]) +// CHECK-NEXT: [[DOTCOUNT135:%.*]] = load i32, ptr [[DOTCOUNT_ADDR134]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER136:%.*]] +// CHECK: omp_loop.preheader136: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND150]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT135]], 1 +// CHECK-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND151]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE152]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM153:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM153]], i32 34, ptr [[P_LASTITER149]], ptr [[P_LOWERBOUND150]], ptr [[P_UPPERBOUND151]], ptr [[P_STRIDE152]], i32 1, i32 0) +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND150]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND151]], align 4 // CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]] // CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER141:%.*]] -// CHECK: omp_loop.header141: -// CHECK-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND142:%.*]] -// CHECK: omp_loop.cond142: -// CHECK-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]] -// CHECK: omp_loop.exit145: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM157]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM158]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER146:%.*]] -// CHECK: omp_loop.after146: -// CHECK-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]] -// CHECK: omp.par.region9.parallel.after: -// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE10:%.*]] -// CHECK: omp.par.pre_finalize10: -// CHECK-NEXT: br label [[FINI159:%.*]] -// CHECK: .fini159: -// CHECK-NEXT: br label [[OMP_PAR_EXIT11_EXITSTUB:%.*]] -// CHECK: omp_loop.body143: -// CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]] -// CHECK-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER137:%.*]] +// CHECK: omp_loop.header137: +// CHECK-NEXT: [[OMP_LOOP_IV143:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER136]] ], [ [[OMP_LOOP_NEXT145:%.*]], [[OMP_LOOP_INC140:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND138:%.*]] +// CHECK: omp_loop.cond138: +// CHECK-NEXT: [[OMP_LOOP_CMP144:%.*]] = icmp ult i32 [[OMP_LOOP_IV143]], [[TMP25]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP144]], label [[OMP_LOOP_BODY139:%.*]], label [[OMP_LOOP_EXIT141:%.*]] +// CHECK: omp_loop.exit141: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM153]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM154:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM154]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER142:%.*]] +// CHECK: omp_loop.after142: +// CHECK-NEXT: br label [[OMP_PAR_REGION7_PARALLEL_AFTER:%.*]] +// CHECK: omp.par.region7.parallel.after: +// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE8:%.*]] +// CHECK: omp.par.pre_finalize8: +// CHECK-NEXT: br label [[DOTFINI155:%.*]] +// CHECK: .fini155: +// CHECK-NEXT: br label [[OMP_PAR_EXIT9_EXITSTUB:%.*]] +// CHECK: omp_loop.body139: +// CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV143]], [[TMP22]] +// CHECK-NEXT: call void @__captured_stmt.16(ptr [[I131]], i32 [[TMP26]], ptr [[AGG_CAPTURED133]]) // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 -// CHECK-NEXT: [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double +// CHECK-NEXT: [[CONV146:%.*]] = sitofp i32 [[TMP27]] to double // CHECK-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 -// CHECK-NEXT: [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]] -// CHECK-NEXT: [[CONV152:%.*]] = fptrunc double [[ADD151]] to float +// CHECK-NEXT: [[ADD147:%.*]] = fadd double [[CONV146]], [[TMP28]] +// CHECK-NEXT: [[CONV148:%.*]] = fptrunc double [[ADD147]] to float // CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC144]] -// CHECK: omp_loop.inc144: -// CHECK-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER141]] -// CHECK: omp_loop.body83: -// CHECK-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]] -// CHECK-NEXT: call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]) +// CHECK-NEXT: store float [[CONV148]], ptr [[TMP29]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC140]] +// CHECK: omp_loop.inc140: +// CHECK-NEXT: [[OMP_LOOP_NEXT145]] = add nuw i32 [[OMP_LOOP_IV143]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER137]] +// CHECK: omp_loop.body80: +// CHECK-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV84]], [[TMP14]] +// CHECK-NEXT: call void @__captured_stmt.12(ptr [[I72]], i32 [[TMP30]], ptr [[AGG_CAPTURED74]]) // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 -// CHECK-NEXT: [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double +// CHECK-NEXT: [[CONV87:%.*]] = sitofp i32 [[TMP31]] to double // CHECK-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 -// CHECK-NEXT: [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]] -// CHECK-NEXT: [[CONV92:%.*]] = fptrunc double [[ADD91]] to float +// CHECK-NEXT: [[ADD88:%.*]] = fadd double [[CONV87]], [[TMP32]] +// CHECK-NEXT: [[CONV89:%.*]] = fptrunc double [[ADD88]] to float // CHECK-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV92]], ptr [[TMP33]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC84]] -// CHECK: omp_loop.inc84: -// CHECK-NEXT: [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER81]] -// CHECK: omp_loop.body24: -// CHECK-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]] -// CHECK-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]) +// CHECK-NEXT: store float [[CONV89]], ptr [[TMP33]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC81]] +// CHECK: omp_loop.inc81: +// CHECK-NEXT: [[OMP_LOOP_NEXT86]] = add nuw i32 [[OMP_LOOP_IV84]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER78]] +// CHECK: omp_loop.body22: +// CHECK-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV26]], [[TMP6]] +// CHECK-NEXT: call void @__captured_stmt.8(ptr [[I14]], i32 [[TMP34]], ptr [[AGG_CAPTURED16]]) // CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 -// CHECK-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double +// CHECK-NEXT: [[CONV29:%.*]] = sitofp i32 [[TMP35]] to double // CHECK-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 -// CHECK-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]] -// CHECK-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float +// CHECK-NEXT: [[ADD30:%.*]] = fadd double [[CONV29]], [[TMP36]] +// CHECK-NEXT: [[CONV31:%.*]] = fptrunc double [[ADD30]] to float // CHECK-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV33]], ptr [[TMP37]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC25]] -// CHECK: omp_loop.inc25: -// CHECK-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER22]] -// CHECK: omp.par.exit11.exitStub: +// CHECK-NEXT: store float [[CONV31]], ptr [[TMP37]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC23]] +// CHECK: omp_loop.inc23: +// CHECK-NEXT: [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20]] +// CHECK: omp.par.exit9.exitStub: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.21 -// CHECK-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: omp.par.entry102: +// CHECK-SAME: (ptr noalias [[TID_ADDR96:%.*]], ptr noalias [[ZERO_ADDR97:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: omp.par.entry98: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META4]] // CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 -// CHECK-NEXT: [[TID107:%.*]] = load i32, ptr [[TID_ADDR_LOCAL106]], align 4 -// CHECK-NEXT: [[I110:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 -// CHECK-NEXT: br label [[OMP_PAR_REGION103:%.*]] -// CHECK: omp.par.region103: -// CHECK-NEXT: store i32 0, ptr [[I110]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4 +// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META7]] +// CHECK-NEXT: [[P_LASTITER124:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND125:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND126:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE127:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TID_ADDR_LOCAL102:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR96]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL102]], align 4 +// CHECK-NEXT: [[TID103:%.*]] = load i32, ptr [[TID_ADDR_LOCAL102]], align 4 +// CHECK-NEXT: [[I106:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED107:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED108:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR109:%.*]] = alloca i32, align 4 +// CHECK-NEXT: br label [[OMP_PAR_REGION99:%.*]] +// CHECK: omp.par.region99: +// CHECK-NEXT: store i32 0, ptr [[I106]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED107]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I106]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED108]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I106]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -// CHECK-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]) -// CHECK-NEXT: [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER115:%.*]] -// CHECK: omp_loop.preheader115: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND129]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1 -// CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE131]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4 +// CHECK-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR109]], ptr [[AGG_CAPTURED107]]) +// CHECK-NEXT: [[DOTCOUNT110:%.*]] = load i32, ptr [[DOTCOUNT_ADDR109]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER111:%.*]] +// CHECK: omp_loop.preheader111: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND125]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT110]], 1 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND126]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE127]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM128:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM128]], i32 34, ptr [[P_LASTITER124]], ptr [[P_LOWERBOUND125]], ptr [[P_UPPERBOUND126]], ptr [[P_STRIDE127]], i32 1, i32 0) +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND125]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND126]], align 4 // CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]] // CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER116:%.*]] -// CHECK: omp_loop.header116: -// CHECK-NEXT: [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND117:%.*]] -// CHECK: omp_loop.cond117: -// CHECK-NEXT: [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]] -// CHECK: omp_loop.exit120: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM132]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM133]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER121:%.*]] -// CHECK: omp_loop.after121: -// CHECK-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]] -// CHECK: omp.par.region103.parallel.after: -// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE104:%.*]] -// CHECK: omp.par.pre_finalize104: -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]] -// CHECK: omp_loop.body118: -// CHECK-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]] -// CHECK-NEXT: call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER112:%.*]] +// CHECK: omp_loop.header112: +// CHECK-NEXT: [[OMP_LOOP_IV118:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER111]] ], [ [[OMP_LOOP_NEXT120:%.*]], [[OMP_LOOP_INC115:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND113:%.*]] +// CHECK: omp_loop.cond113: +// CHECK-NEXT: [[OMP_LOOP_CMP119:%.*]] = icmp ult i32 [[OMP_LOOP_IV118]], [[TMP9]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP119]], label [[OMP_LOOP_BODY114:%.*]], label [[OMP_LOOP_EXIT116:%.*]] +// CHECK: omp_loop.exit116: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM128]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM129:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM129]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER117:%.*]] +// CHECK: omp_loop.after117: +// CHECK-NEXT: br label [[OMP_PAR_REGION99_PARALLEL_AFTER:%.*]] +// CHECK: omp.par.region99.parallel.after: +// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE100:%.*]] +// CHECK: omp.par.pre_finalize100: +// CHECK-NEXT: br label [[DOTFINI130:%.*]] +// CHECK: .fini130: +// CHECK-NEXT: br label [[OMP_PAR_EXIT101_EXITSTUB:%.*]] +// CHECK: omp_loop.body114: +// CHECK-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV118]], [[TMP6]] +// CHECK-NEXT: call void @__captured_stmt.14(ptr [[I106]], i32 [[TMP10]], ptr [[AGG_CAPTURED108]]) // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 -// CHECK-NEXT: [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double +// CHECK-NEXT: [[CONV121:%.*]] = sitofp i32 [[TMP11]] to double // CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 -// CHECK-NEXT: [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]] -// CHECK-NEXT: [[CONV127:%.*]] = fptrunc double [[ADD126]] to float +// CHECK-NEXT: [[ADD122:%.*]] = fadd double [[CONV121]], [[TMP12]] +// CHECK-NEXT: [[CONV123:%.*]] = fptrunc double [[ADD122]] to float // CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV127]], ptr [[TMP13]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC119]] -// CHECK: omp_loop.inc119: -// CHECK-NEXT: [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER116]] -// CHECK: omp.par.exit105.exitStub: +// CHECK-NEXT: store float [[CONV123]], ptr [[TMP13]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC115]] +// CHECK: omp_loop.inc115: +// CHECK-NEXT: [[OMP_LOOP_NEXT120]] = add nuw i32 [[OMP_LOOP_IV118]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER112]] +// CHECK: omp.par.exit101.exitStub: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par -// CHECK-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: omp.par.entry43: +// CHECK-SAME: (ptr noalias [[TID_ADDR38:%.*]], ptr noalias [[ZERO_ADDR39:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: omp.par.entry40: // CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META4]] // CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META7]] // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 -// CHECK-NEXT: [[TID48:%.*]] = load i32, ptr [[TID_ADDR_LOCAL47]], align 4 -// CHECK-NEXT: [[I51:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 -// CHECK-NEXT: br label [[OMP_PAR_REGION44:%.*]] -// CHECK: omp.par.region44: -// CHECK-NEXT: store i32 0, ptr [[I51]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4 +// CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META7]] +// CHECK-NEXT: [[P_LASTITER66:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND67:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND68:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE69:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TID_ADDR_LOCAL44:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR38]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL44]], align 4 +// CHECK-NEXT: [[TID45:%.*]] = load i32, ptr [[TID_ADDR_LOCAL44]], align 4 +// CHECK-NEXT: [[I48:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED49:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED50:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR51:%.*]] = alloca i32, align 4 +// CHECK-NEXT: br label [[OMP_PAR_REGION41:%.*]] +// CHECK: omp.par.region41: +// CHECK-NEXT: store i32 0, ptr [[I48]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED49]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I48]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED50]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I48]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -// CHECK-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]) -// CHECK-NEXT: [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER56:%.*]] -// CHECK: omp_loop.preheader56: -// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND70]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1 -// CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4 -// CHECK-NEXT: store i32 1, ptr [[P_STRIDE72]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4 +// CHECK-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR51]], ptr [[AGG_CAPTURED49]]) +// CHECK-NEXT: [[DOTCOUNT52:%.*]] = load i32, ptr [[DOTCOUNT_ADDR51]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER53:%.*]] +// CHECK: omp_loop.preheader53: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND67]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT52]], 1 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND68]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE69]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM70:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM70]], i32 34, ptr [[P_LASTITER66]], ptr [[P_LOWERBOUND67]], ptr [[P_UPPERBOUND68]], ptr [[P_STRIDE69]], i32 1, i32 0) +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND67]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND68]], align 4 // CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]] // CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER57:%.*]] -// CHECK: omp_loop.header57: -// CHECK-NEXT: [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND58:%.*]] -// CHECK: omp_loop.cond58: -// CHECK-NEXT: [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]] -// CHECK: omp_loop.exit61: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM73]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM74]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER62:%.*]] -// CHECK: omp_loop.after62: -// CHECK-NEXT: br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]] -// CHECK: omp.par.region44.parallel.after: -// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE45:%.*]] -// CHECK: omp.par.pre_finalize45: -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK: omp_loop.body59: -// CHECK-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]] -// CHECK-NEXT: call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER54:%.*]] +// CHECK: omp_loop.header54: +// CHECK-NEXT: [[OMP_LOOP_IV60:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER53]] ], [ [[OMP_LOOP_NEXT62:%.*]], [[OMP_LOOP_INC57:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND55:%.*]] +// CHECK: omp_loop.cond55: +// CHECK-NEXT: [[OMP_LOOP_CMP61:%.*]] = icmp ult i32 [[OMP_LOOP_IV60]], [[TMP9]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP61]], label [[OMP_LOOP_BODY56:%.*]], label [[OMP_LOOP_EXIT58:%.*]] +// CHECK: omp_loop.exit58: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM70]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM71:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM71]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER59:%.*]] +// CHECK: omp_loop.after59: +// CHECK-NEXT: br label [[OMP_PAR_REGION41_PARALLEL_AFTER:%.*]] +// CHECK: omp.par.region41.parallel.after: +// CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE42:%.*]] +// CHECK: omp.par.pre_finalize42: +// CHECK-NEXT: br label [[DOTFINI:%.*]] +// CHECK: .fini: +// CHECK-NEXT: br label [[OMP_PAR_EXIT43_EXITSTUB:%.*]] +// CHECK: omp_loop.body56: +// CHECK-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV60]], [[TMP6]] +// CHECK-NEXT: call void @__captured_stmt.10(ptr [[I48]], i32 [[TMP10]], ptr [[AGG_CAPTURED50]]) // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 -// CHECK-NEXT: [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double +// CHECK-NEXT: [[CONV63:%.*]] = sitofp i32 [[TMP11]] to double // CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8 -// CHECK-NEXT: [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]] -// CHECK-NEXT: [[CONV68:%.*]] = fptrunc double [[ADD67]] to float +// CHECK-NEXT: [[ADD64:%.*]] = fadd double [[CONV63]], [[TMP12]] +// CHECK-NEXT: [[CONV65:%.*]] = fptrunc double [[ADD64]] to float // CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV68]], ptr [[TMP13]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC60]] -// CHECK: omp_loop.inc60: -// CHECK-NEXT: [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER57]] -// CHECK: omp.par.exit46.exitStub: +// CHECK-NEXT: store float [[CONV65]], ptr [[TMP13]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC57]] +// CHECK: omp_loop.inc57: +// CHECK-NEXT: [[OMP_LOOP_NEXT62]] = add nuw i32 [[OMP_LOOP_IV60]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER54]] +// CHECK: omp.par.exit43.exitStub: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.5 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1024,7 +1029,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1047,13 +1052,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.6 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1067,13 +1072,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.7 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1084,7 +1089,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1107,13 +1112,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.8 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1127,13 +1132,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.9 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1144,7 +1149,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1167,13 +1172,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.10 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1187,13 +1192,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.11 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1204,7 +1209,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1227,13 +1232,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.12 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1247,13 +1252,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.13 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1264,7 +1269,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1287,13 +1292,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.14 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1307,13 +1312,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.15 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1324,7 +1329,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1347,13 +1352,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.16 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1367,13 +1372,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.17 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1384,7 +1389,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1407,13 +1412,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.18 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1427,13 +1432,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.19 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1444,7 +1449,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 100, ptr [[DOTSTOP]], align 4 @@ -1467,13 +1472,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.20 -// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1487,7 +1492,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // @@ -1495,17 +1500,16 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_0v // CHECK-DEBUG-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG8:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14parallel_for_0v..omp_par), !dbg [[DBG14:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @_Z14parallel_for_0v..omp_par), !dbg [[DBG13:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK-DEBUG: omp.par.exit: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG18:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG17:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_0v..omp_par -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG19:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG18:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: // CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 @@ -1521,60 +1525,60 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META21:![0-9]+]], !DIExpression(), [[META26:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META26]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG27:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP1]], align 8, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG28:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META25]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG26:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP1]], align 8, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG26]] // CHECK-DEBUG: omp_loop.preheader: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[TMP5]], !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 1, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[TMP5]], !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 1, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG26]] // CHECK-DEBUG: omp_loop.header: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG26]] // CHECK-DEBUG: omp_loop.cond: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP8]], !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG26]] // CHECK-DEBUG: omp_loop.exit: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]), !dbg [[DBG29:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG29]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]), !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]), !dbg [[DBG28:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG28]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG26]] // CHECK-DEBUG: omp_loop.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG30:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG29:![0-9]+]] // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: -// CHECK-DEBUG-NEXT: br label [[FINI:.*]] +// CHECK-DEBUG-NEXT: br label [[DOTFINI:%.*]] // CHECK-DEBUG: .fini: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG30]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG29]] // CHECK-DEBUG: omp_loop.body: -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP5]], !dbg [[DBG29]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP5]], !dbg [[DBG28]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP9]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG26]] // CHECK-DEBUG: omp_loop.inc: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG27]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG27]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG26]] // CHECK-DEBUG: omp.par.exit.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG31:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG30:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1582,15 +1586,15 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META38:![0-9]+]], !DIExpression(), [[META39:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META40]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META39]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META42:![0-9]+]], !DIExpression(), [[META44:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG45:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG45]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META44]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META41:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG44:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG44]], !nonnull [[META12:![0-9]+]], !align [[META46:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG44]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META43]] // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META48]] // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META49:![0-9]+]], !DIExpression(), [[META48]]) @@ -1613,13 +1617,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META48]] // CHECK-DEBUG: cond.end: // CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META48]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META48]], !nonnull [[META12]], !align [[META46]] // CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META48]] // CHECK-DEBUG-NEXT: ret void, !dbg [[DBG50:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG52:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG52:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -1636,7 +1640,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG66:![0-9]+]] // CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG66]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG66]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]], !nonnull [[META12]], !align [[META46]] // CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META61]] // CHECK-DEBUG-NEXT: ret void, !dbg [[DBG64]] // @@ -1644,7 +1648,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid // CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG69:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -1654,238 +1658,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) // CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META79:![0-9]+]], !DIExpression(), [[META80:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] -// CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR18]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] -// CHECK-DEBUG: omp.par.exit: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG84:![0-9]+]] -// -// -// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par.4 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 -// CHECK-DEBUG-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]]) -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] -// CHECK-DEBUG: omp.par.region: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] -// CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR1]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR2:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] -// CHECK-DEBUG: omp.par.exit7: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]] -// CHECK-DEBUG: omp.par.region.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize: -// CHECK-DEBUG-NEXT: br label [[FINI16:%.*]] -// CHECK-DEBUG: .fini16: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]] -// CHECK-DEBUG: omp.par.exit.exitStub: -// CHECK-DEBUG-NEXT: ret void -// -// -// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG93:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry4: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL8:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR2]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL8]], align 4 -// CHECK-DEBUG-NEXT: [[TID9:%.*]] = load i32, ptr [[TID_ADDR_LOCAL8]], align 4 -// CHECK-DEBUG-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META104:![0-9]+]], !DIExpression(), [[META105:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]]) -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] -// CHECK-DEBUG: omp.par.region5: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META94:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META99]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG100:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG100]] -// CHECK-DEBUG: omp_loop.preheader: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG100]] -// CHECK-DEBUG: omp_loop.header: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG100]] -// CHECK-DEBUG: omp_loop.cond: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG100]] -// CHECK-DEBUG: omp_loop.exit: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG102:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG102]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG100]] -// CHECK-DEBUG: omp_loop.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG103:![0-9]+]] -// CHECK-DEBUG: omp.par.region5.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE6:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize6: -// CHECK-DEBUG-NEXT: br label [[FINI:%.*]] -// CHECK-DEBUG: .fini: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]] -// CHECK-DEBUG: omp_loop.body: -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG104]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG106:![0-9]+]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG107:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG104]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG108:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG109:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG100]] -// CHECK-DEBUG: omp_loop.inc: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]] -// CHECK-DEBUG: omp.par.exit7.exitStub: -// CHECK-DEBUG-NEXT: ret void -// -// -// CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.2 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG110:![0-9]+]] { -// CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-DEBUG-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META111:![0-9]+]], !DIExpression(), [[META112:![0-9]+]]) -// CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META113:![0-9]+]], !DIExpression(), [[META112]]) -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META114:![0-9]+]], !DIExpression(), [[META116:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG117]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG117]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META116]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META119:![0-9]+]], !DIExpression(), [[META120:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META121:![0-9]+]], !DIExpression(), [[META120]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META120]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META120]] -// CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META120]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META120]] -// CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META120]] -// CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META120]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META120]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META120]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG122:![0-9]+]] -// -// -// CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.3 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG124:![0-9]+]] { -// CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META126]]) -// CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META126]]) -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG129]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG131:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG131]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG131]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG131]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META126]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG129]] -// -// -// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid -// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG132:![0-9]+]] { -// CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-DEBUG-NEXT: [[I185:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META134:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META136:![0-9]+]]) -// CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META137:![0-9]+]], !DIExpression(), [[META138:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -1894,74 +1666,303 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT184:%.*]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6:[0-9]+]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG]]), !dbg [[DBG81:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK-DEBUG: omp.par.exit: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I185]], [[META144:![0-9]+]], !DIExpression(), [[META147:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[META147]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG86:![0-9]+]] +// +// +// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par.4 +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG87:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META46]] +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META88:![0-9]+]] +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK-DEBUG-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META89:![0-9]+]], !DIExpression(), [[META90:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META93:![0-9]+]], !DIExpression(), [[META94:![0-9]+]]) +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] +// CHECK-DEBUG: omp.par.region: +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK-DEBUG: omp_parallel: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR1]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR2:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8:[0-9]+]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG95:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT6:%.*]] +// CHECK-DEBUG: omp.par.exit6: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG100:![0-9]+]] +// CHECK-DEBUG: omp.par.region.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize: +// CHECK-DEBUG-NEXT: br label [[DOTFINI14:%.*]] +// CHECK-DEBUG: .fini14: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG100]] +// CHECK-DEBUG: omp.par.exit.exitStub: +// CHECK-DEBUG-NEXT: ret void +// +// +// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR1:%.*]], ptr noalias [[ZERO_ADDR2:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG101:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry3: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META46]] +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL7:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR1]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL7]], align 4 +// CHECK-DEBUG-NEXT: [[TID8:%.*]] = load i32, ptr [[TID_ADDR_LOCAL7]], align 4 +// CHECK-DEBUG-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED11:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META104:![0-9]+]], !DIExpression(), [[META105:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]]) +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION4:%.*]] +// CHECK-DEBUG: omp.par.region4: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META108:![0-9]+]], !DIExpression(), [[META113:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META113]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG114:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED11]], i32 0, i32 0, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG115:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG114]] +// CHECK-DEBUG: omp_loop.preheader: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG114]] +// CHECK-DEBUG: omp_loop.header: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG114]] +// CHECK-DEBUG: omp_loop.cond: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG114]] +// CHECK-DEBUG: omp_loop.exit: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM]]), !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG116:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM13]]), !dbg [[DBG116]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG114]] +// CHECK-DEBUG: omp_loop.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION4_PARALLEL_AFTER:%.*]], !dbg [[DBG117:![0-9]+]] +// CHECK-DEBUG: omp.par.region4.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE5:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize5: +// CHECK-DEBUG-NEXT: br label [[DOTFINI:%.*]] +// CHECK-DEBUG: .fini: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT6_EXITSTUB:%.*]], !dbg [[DBG117]] +// CHECK-DEBUG: omp_loop.body: +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG116]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED11]]), !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG118:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG118]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG120:![0-9]+]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG121:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG118]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG122:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV12]], ptr [[TMP13]], align 4, !dbg [[DBG123:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG114]] +// CHECK-DEBUG: omp_loop.inc: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG114]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG114]] +// CHECK-DEBUG: omp.par.exit6.exitStub: +// CHECK-DEBUG-NEXT: ret void +// +// +// CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.2 +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG124:![0-9]+]] { +// CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-DEBUG-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) +// CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META126]]) +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META128:![0-9]+]], !DIExpression(), [[META130:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG131:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG131]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG131]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META130]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META133:![0-9]+]], !DIExpression(), [[META134:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META135:![0-9]+]], !DIExpression(), [[META134]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META134]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META134]] +// CHECK-DEBUG: cond.true: +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META134]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META134]] +// CHECK-DEBUG: cond.false: +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META134]] +// CHECK-DEBUG: cond.end: +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META134]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META134]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META134]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG136:![0-9]+]] +// +// +// CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.3 +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG138:![0-9]+]] { +// CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META139:![0-9]+]], !DIExpression(), [[META140:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META140]]) +// CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META140]]) +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG143:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG143]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG145:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG145]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG145]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG145]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META140]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG143]] +// +// +// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid +// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG146:![0-9]+]] { +// CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-DEBUG-NEXT: [[I181:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED182:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED183:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR184:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER199:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND200:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND201:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE202:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META147:![0-9]+]], !DIExpression(), [[META148:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META149:![0-9]+]], !DIExpression(), [[META150:![0-9]+]]) +// CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]]) +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK-DEBUG: omp_parallel: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT:%.*]] +// CHECK-DEBUG: omp.par.exit: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I181]], [[META157:![0-9]+]], !DIExpression(), [[META160:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I181]], align 4, !dbg [[META160]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED182]], i32 0, i32 0, !dbg [[DBG161:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I181]], ptr [[TMP0]], align 8, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED183]], i32 0, i32 0, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I181]], align 4, !dbg [[DBG162:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR184]], ptr [[AGG_CAPTURED182]]), !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT185:%.*]] = load i32, ptr [[DOTCOUNT_ADDR184]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER186:%.*]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.preheader186: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND200]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT185]], 1, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND201]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE202]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM203:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM203]], i32 34, ptr [[P_LASTITER199]], ptr [[P_LOWERBOUND200]], ptr [[P_UPPERBOUND201]], ptr [[P_STRIDE202]], i32 1, i32 0), !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND200]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND201]], align 4, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER187:%.*]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.header187: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV193:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER186]] ], [ [[OMP_LOOP_NEXT195:%.*]], [[OMP_LOOP_INC190:%.*]] ], !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND188:%.*]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.cond188: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP194:%.*]] = icmp ult i32 [[OMP_LOOP_IV193]], [[TMP7]], !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP194]], label [[OMP_LOOP_BODY189:%.*]], label [[OMP_LOOP_EXIT191:%.*]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.body189: +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV193]], [[TMP4]], !dbg [[DBG163:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I181]], i32 [[TMP8]], ptr [[AGG_CAPTURED183]]), !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG164:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV196:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG164]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: [[ADD197:%.*]] = fadd double [[CONV196]], [[TMP10]], !dbg [[DBG165:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV198:%.*]] = fptrunc double [[ADD197]] to float, !dbg [[DBG164]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG166:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV198]], ptr [[TMP11]], align 4, !dbg [[DBG167:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC190]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.inc190: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT195]] = add nuw i32 [[OMP_LOOP_IV193]], 1, !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER187]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.exit191: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM203]]), !dbg [[DBG161]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM204:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM204]]), !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER192:%.*]], !dbg [[DBG161]] +// CHECK-DEBUG: omp_loop.after192: +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG168:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.23 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG156:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG169:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META46]] // CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META88]] // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER174:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND175:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND176:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE177:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 @@ -1974,213 +1975,49 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[I160:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META172:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META174:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[I156:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED157:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED158:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR159:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META175:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META157:![0-9]+]], !DIExpression(), [[META161:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META161]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG162:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG163:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META176:![0-9]+]], !DIExpression(), [[META180:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META180]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG181:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG182:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG181]] // CHECK-DEBUG: omp_loop.preheader: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG181]] // CHECK-DEBUG: omp_loop.header: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG181]] // CHECK-DEBUG: omp_loop.cond: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG181]] // CHECK-DEBUG: omp_loop.exit: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG164:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG164]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]]), !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG183:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG183]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG181]] // CHECK-DEBUG: omp_loop.after: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG165:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] -// CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR215:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR215]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR216:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR216]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG166:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159:%.*]] -// CHECK-DEBUG: omp.par.exit11: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I160]], [[META170:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I160]], align 4, !dbg [[META173]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG175:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.preheader165: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.header166: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.cond167: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.exit170: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.after171: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]] -// CHECK-DEBUG: omp.par.region.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize: -// CHECK-DEBUG-NEXT: br label [[FINI184:%.*]] -// CHECK-DEBUG: .fini184: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG177]] -// CHECK-DEBUG: omp_loop.body168: -// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG176]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG178]] -// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG176]] -// CHECK-DEBUG-NEXT: [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG179:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG178]] -// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG181:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC169]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.inc169: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.body: -// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG164]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG182:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG182]] -// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG164]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG183:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG182]] -// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG184:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG185:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG162]] -// CHECK-DEBUG: omp_loop.inc: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]] -// CHECK-DEBUG: omp.par.exit.exitStub: -// CHECK-DEBUG-NEXT: ret void -// -// -// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.22 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG186:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry8: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL12]], align 4 -// CHECK-DEBUG-NEXT: [[TID13:%.*]] = load i32, ptr [[TID_ADDR_LOCAL12]], align 4 -// CHECK-DEBUG-NEXT: [[I16:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[I75:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[I135:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META207:![0-9]+]], !DIExpression(), [[META208:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META209:![0-9]+]], !DIExpression(), [[META210:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META211:![0-9]+]], !DIExpression(), [[META212:![0-9]+]]) -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] -// CHECK-DEBUG: omp.par.region9: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I16]], [[META187:![0-9]+]], !DIExpression(), [[META192:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[META192]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG193:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG194:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG193]] -// CHECK-DEBUG: omp_loop.preheader21: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG193]] -// CHECK-DEBUG: omp_loop.header22: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG193]] -// CHECK-DEBUG: omp_loop.cond23: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG193]] -// CHECK-DEBUG: omp_loop.exit26: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG195:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG195]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG193]] -// CHECK-DEBUG: omp_loop.after27: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG196:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -2189,312 +2026,475 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] -// CHECK-DEBUG: omp.par.exit46: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I75]], [[META201:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I75]], align 4, !dbg [[META204]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG206:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.preheader80: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.header81: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.cond82: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.exit85: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG207:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG207]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.after86: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL213:%.*]] -// CHECK-DEBUG: omp_parallel213: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR211]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR212:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG209:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134:%.*]] -// CHECK-DEBUG: omp.par.exit105: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I135]], [[META213:![0-9]+]], !DIExpression(), [[META216:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I135]], align 4, !dbg [[META216]] -// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG218:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.preheader140: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.header141: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.cond142: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.exit145: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.after146: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]] -// CHECK-DEBUG: omp.par.region9.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE10:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize10: -// CHECK-DEBUG-NEXT: br label [[FINI159:%.*]] -// CHECK-DEBUG: .fini159: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG220]] -// CHECK-DEBUG: omp_loop.body143: -// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG219]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG221]] -// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]] -// CHECK-DEBUG-NEXT: [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG222:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG221]] -// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG224:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC144]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.inc144: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.body83: -// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG207]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG225:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG225]] -// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG207]] -// CHECK-DEBUG-NEXT: [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG226:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG225]] -// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG227:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG228:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC84]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.inc84: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81]], !dbg [[DBG205]] -// CHECK-DEBUG: omp_loop.body24: -// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG195]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG229:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG229]] -// CHECK-DEBUG-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG195]] -// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG230:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG229]] -// CHECK-DEBUG-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG232:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG193]] -// CHECK-DEBUG: omp_loop.inc25: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]] -// CHECK-DEBUG: omp.par.exit11.exitStub: +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18:[0-9]+]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG]]), !dbg [[DBG184:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT9:%.*]] +// CHECK-DEBUG: omp.par.exit9: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I156]], [[META188:![0-9]+]], !DIExpression(), [[META191:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I156]], align 4, !dbg [[META191]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED157]], i32 0, i32 0, !dbg [[DBG192:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I156]], ptr [[TMP10]], align 8, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED158]], i32 0, i32 0, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I156]], align 4, !dbg [[DBG193:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR159]], ptr [[AGG_CAPTURED157]]), !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT160:%.*]] = load i32, ptr [[DOTCOUNT_ADDR159]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER161:%.*]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.preheader161: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND175]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT160]], 1, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND176]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE177]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM178:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM178]], i32 34, ptr [[P_LASTITER174]], ptr [[P_LOWERBOUND175]], ptr [[P_UPPERBOUND176]], ptr [[P_STRIDE177]], i32 1, i32 0), !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND175]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND176]], align 4, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER162:%.*]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.header162: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV168:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER161]] ], [ [[OMP_LOOP_NEXT170:%.*]], [[OMP_LOOP_INC165:%.*]] ], !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND163:%.*]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.cond163: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP169:%.*]] = icmp ult i32 [[OMP_LOOP_IV168]], [[TMP17]], !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP169]], label [[OMP_LOOP_BODY164:%.*]], label [[OMP_LOOP_EXIT166:%.*]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.exit166: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM178]]), !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM179:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG194:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM179]]), !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER167:%.*]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.after167: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG195:![0-9]+]] +// CHECK-DEBUG: omp.par.region.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize: +// CHECK-DEBUG-NEXT: br label [[DOTFINI180:%.*]] +// CHECK-DEBUG: .fini180: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG195]] +// CHECK-DEBUG: omp_loop.body164: +// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV168]], [[TMP14]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I156]], i32 [[TMP18]], ptr [[AGG_CAPTURED158]]), !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV171:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG196]] +// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: [[ADD172:%.*]] = fadd double [[CONV171]], [[TMP20]], !dbg [[DBG197:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV173:%.*]] = fptrunc double [[ADD172]] to float, !dbg [[DBG196]] +// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV173]], ptr [[TMP21]], align 4, !dbg [[DBG199:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC165]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.inc165: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT170]] = add nuw i32 [[OMP_LOOP_IV168]], 1, !dbg [[DBG192]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER162]], !dbg [[DBG192]] +// CHECK-DEBUG: omp_loop.body: +// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG183]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG200:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG200]] +// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG183]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG201:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG200]] +// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG202:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG203:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG181]] +// CHECK-DEBUG: omp_loop.inc: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG181]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG181]] +// CHECK-DEBUG: omp.par.exit.exitStub: +// CHECK-DEBUG-NEXT: ret void +// +// +// CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.22 +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR4:%.*]], ptr noalias [[ZERO_ADDR5:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG204:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry6: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META46]] +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[STRUCTARG205:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER149:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND150:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND151:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE152:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER90:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND91:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND92:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE93:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER32:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND33:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND34:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE35:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL10:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR4]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL10]], align 4 +// CHECK-DEBUG-NEXT: [[TID11:%.*]] = load i32, ptr [[TID_ADDR_LOCAL10]], align 4 +// CHECK-DEBUG-NEXT: [[I14:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I72:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED73:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED74:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR75:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I131:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED132:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED133:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR134:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META205:![0-9]+]], !DIExpression(), [[META206:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META207:![0-9]+]], !DIExpression(), [[META208:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META209:![0-9]+]], !DIExpression(), [[META210:![0-9]+]]) +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION7:%.*]] +// CHECK-DEBUG: omp.par.region7: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I14]], [[META211:![0-9]+]], !DIExpression(), [[META216:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I14]], align 4, !dbg [[META216]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED15]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I14]], ptr [[TMP2]], align 8, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED16]], i32 0, i32 0, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I14]], align 4, !dbg [[DBG218:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR17]], ptr [[AGG_CAPTURED15]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT18:%.*]] = load i32, ptr [[DOTCOUNT_ADDR17]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER19:%.*]], !dbg [[DBG217]] +// CHECK-DEBUG: omp_loop.preheader19: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND33]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT18]], 1, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND34]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE35]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, ptr [[P_LASTITER32]], ptr [[P_LOWERBOUND33]], ptr [[P_UPPERBOUND34]], ptr [[P_STRIDE35]], i32 1, i32 0), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND33]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND34]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER20:%.*]], !dbg [[DBG217]] +// CHECK-DEBUG: omp_loop.header20: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ], !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND21:%.*]], !dbg [[DBG217]] +// CHECK-DEBUG: omp_loop.cond21: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[TMP9]], !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]], !dbg [[DBG217]] +// CHECK-DEBUG: omp_loop.exit24: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM36]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG219:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]]), !dbg [[DBG219]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER25:%.*]], !dbg [[DBG217]] +// CHECK-DEBUG: omp_loop.after25: +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK-DEBUG: omp_parallel: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR1]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR2:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23:[0-9]+]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG220:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT43:%.*]] +// CHECK-DEBUG: omp.par.exit43: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I72]], [[META224:![0-9]+]], !DIExpression(), [[META227:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I72]], align 4, !dbg [[META227]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED73]], i32 0, i32 0, !dbg [[DBG228:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I72]], ptr [[TMP10]], align 8, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED74]], i32 0, i32 0, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I72]], align 4, !dbg [[DBG229:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR75]], ptr [[AGG_CAPTURED73]]), !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT76:%.*]] = load i32, ptr [[DOTCOUNT_ADDR75]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER77:%.*]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.preheader77: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND91]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT76]], 1, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND92]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE93]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM94:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM94]], i32 34, ptr [[P_LASTITER90]], ptr [[P_LOWERBOUND91]], ptr [[P_UPPERBOUND92]], ptr [[P_STRIDE93]], i32 1, i32 0), !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND91]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND92]], align 4, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER78:%.*]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.header78: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV84:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER77]] ], [ [[OMP_LOOP_NEXT86:%.*]], [[OMP_LOOP_INC81:%.*]] ], !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND79:%.*]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.cond79: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP85:%.*]] = icmp ult i32 [[OMP_LOOP_IV84]], [[TMP17]], !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP85]], label [[OMP_LOOP_BODY80:%.*]], label [[OMP_LOOP_EXIT82:%.*]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.exit82: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM94]]), !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM95:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG230:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM95]]), !dbg [[DBG230]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER83:%.*]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.after83: +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL209:%.*]] +// CHECK-DEBUG: omp_parallel209: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR206:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG205]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR206]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR207:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG205]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR207]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR208:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG205]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR208]], align 8 +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31:[0-9]+]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG205]]), !dbg [[DBG231:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT101:%.*]] +// CHECK-DEBUG: omp.par.exit101: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I131]], [[META235:![0-9]+]], !DIExpression(), [[META238:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I131]], align 4, !dbg [[META238]] +// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED132]], i32 0, i32 0, !dbg [[DBG239:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I131]], ptr [[TMP18]], align 8, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED133]], i32 0, i32 0, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[I131]], align 4, !dbg [[DBG240:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR134]], ptr [[AGG_CAPTURED132]]), !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT135:%.*]] = load i32, ptr [[DOTCOUNT_ADDR134]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER136:%.*]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.preheader136: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND150]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT135]], 1, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND151]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE152]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM153:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM153]], i32 34, ptr [[P_LASTITER149]], ptr [[P_LOWERBOUND150]], ptr [[P_UPPERBOUND151]], ptr [[P_STRIDE152]], i32 1, i32 0), !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND150]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND151]], align 4, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER137:%.*]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.header137: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV143:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER136]] ], [ [[OMP_LOOP_NEXT145:%.*]], [[OMP_LOOP_INC140:%.*]] ], !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND138:%.*]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.cond138: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP144:%.*]] = icmp ult i32 [[OMP_LOOP_IV143]], [[TMP25]], !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP144]], label [[OMP_LOOP_BODY139:%.*]], label [[OMP_LOOP_EXIT141:%.*]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.exit141: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM153]]), !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM154:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG241:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM154]]), !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER142:%.*]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.after142: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION7_PARALLEL_AFTER:%.*]], !dbg [[DBG242:![0-9]+]] +// CHECK-DEBUG: omp.par.region7.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE8:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize8: +// CHECK-DEBUG-NEXT: br label [[DOTFINI155:%.*]] +// CHECK-DEBUG: .fini155: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT9_EXITSTUB:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG: omp_loop.body139: +// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV143]], [[TMP22]], !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I131]], i32 [[TMP26]], ptr [[AGG_CAPTURED133]]), !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG243:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV146:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG243]] +// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[ADD147:%.*]] = fadd double [[CONV146]], [[TMP28]], !dbg [[DBG244:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV148:%.*]] = fptrunc double [[ADD147]] to float, !dbg [[DBG243]] +// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG245:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV148]], ptr [[TMP29]], align 4, !dbg [[DBG246:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC140]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.inc140: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT145]] = add nuw i32 [[OMP_LOOP_IV143]], 1, !dbg [[DBG239]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER137]], !dbg [[DBG239]] +// CHECK-DEBUG: omp_loop.body80: +// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV84]], [[TMP14]], !dbg [[DBG230]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I72]], i32 [[TMP30]], ptr [[AGG_CAPTURED74]]), !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG247:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV87:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG247]] +// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG230]] +// CHECK-DEBUG-NEXT: [[ADD88:%.*]] = fadd double [[CONV87]], [[TMP32]], !dbg [[DBG248:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV89:%.*]] = fptrunc double [[ADD88]] to float, !dbg [[DBG247]] +// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG249:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV89]], ptr [[TMP33]], align 4, !dbg [[DBG250:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC81]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.inc81: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT86]] = add nuw i32 [[OMP_LOOP_IV84]], 1, !dbg [[DBG228]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER78]], !dbg [[DBG228]] +// CHECK-DEBUG: omp_loop.body22: +// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV26]], [[TMP6]], !dbg [[DBG219]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I14]], i32 [[TMP34]], ptr [[AGG_CAPTURED16]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG251:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV29:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG251]] +// CHECK-DEBUG-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]] +// CHECK-DEBUG-NEXT: [[ADD30:%.*]] = fadd double [[CONV29]], [[TMP36]], !dbg [[DBG252:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = fptrunc double [[ADD30]] to float, !dbg [[DBG251]] +// CHECK-DEBUG-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG253:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV31]], ptr [[TMP37]], align 4, !dbg [[DBG254:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC23]], !dbg [[DBG217]] +// CHECK-DEBUG: omp_loop.inc23: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER20]], !dbg [[DBG217]] +// CHECK-DEBUG: omp.par.exit9.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.21 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG233:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry102: +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR96:%.*]], ptr noalias [[ZERO_ADDR97:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG255:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry98: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META46]] // CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META88]] // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 -// CHECK-DEBUG-NEXT: [[TID107:%.*]] = load i32, ptr [[TID_ADDR_LOCAL106]], align 4 -// CHECK-DEBUG-NEXT: [[I110:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META262:![0-9]+]], !DIExpression(), [[META263:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]]) -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] -// CHECK-DEBUG: omp.par.region103: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I110]], [[META234:![0-9]+]], !DIExpression(), [[META240:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I110]], align 4, !dbg [[META240]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG242:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.preheader115: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.header116: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.cond117: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.exit120: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG243:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG243]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.after121: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]] -// CHECK-DEBUG: omp.par.region103.parallel.after: -// CHECK-DEBUG-NEXT: br label [[FINI134:%.*]] -// CHECK-DEBUG: .fini134: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG244]] -// CHECK-DEBUG: omp_loop.body118: -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG243]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG245]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG243]] -// CHECK-DEBUG-NEXT: [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG246:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG245]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG247:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG248:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC119]], !dbg [[DBG241]] -// CHECK-DEBUG: omp_loop.inc119: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116]], !dbg [[DBG241]] -// CHECK-DEBUG: omp.par.exit105.exitStub: +// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[P_LASTITER124:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND125:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND126:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE127:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL102:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR96]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL102]], align 4 +// CHECK-DEBUG-NEXT: [[TID103:%.*]] = load i32, ptr [[TID_ADDR_LOCAL102]], align 4 +// CHECK-DEBUG-NEXT: [[I106:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED107:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED108:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR109:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META256:![0-9]+]], !DIExpression(), [[META257:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META258:![0-9]+]], !DIExpression(), [[META259:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION99:%.*]] +// CHECK-DEBUG: omp.par.region99: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I106]], [[META262:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I106]], align 4, !dbg [[META268]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED107]], i32 0, i32 0, !dbg [[DBG269:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I106]], ptr [[TMP2]], align 8, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED108]], i32 0, i32 0, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I106]], align 4, !dbg [[DBG270:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR109]], ptr [[AGG_CAPTURED107]]), !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT110:%.*]] = load i32, ptr [[DOTCOUNT_ADDR109]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER111:%.*]], !dbg [[DBG269]] +// CHECK-DEBUG: omp_loop.preheader111: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND125]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT110]], 1, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND126]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE127]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM128:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM128]], i32 34, ptr [[P_LASTITER124]], ptr [[P_LOWERBOUND125]], ptr [[P_UPPERBOUND126]], ptr [[P_STRIDE127]], i32 1, i32 0), !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND125]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND126]], align 4, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER112:%.*]], !dbg [[DBG269]] +// CHECK-DEBUG: omp_loop.header112: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV118:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER111]] ], [ [[OMP_LOOP_NEXT120:%.*]], [[OMP_LOOP_INC115:%.*]] ], !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND113:%.*]], !dbg [[DBG269]] +// CHECK-DEBUG: omp_loop.cond113: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP119:%.*]] = icmp ult i32 [[OMP_LOOP_IV118]], [[TMP9]], !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP119]], label [[OMP_LOOP_BODY114:%.*]], label [[OMP_LOOP_EXIT116:%.*]], !dbg [[DBG269]] +// CHECK-DEBUG: omp_loop.exit116: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM128]]), !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM129:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG271:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM129]]), !dbg [[DBG271]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER117:%.*]], !dbg [[DBG269]] +// CHECK-DEBUG: omp_loop.after117: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION99_PARALLEL_AFTER:%.*]], !dbg [[DBG272:![0-9]+]] +// CHECK-DEBUG: omp.par.region99.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE100:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize100: +// CHECK-DEBUG-NEXT: br label [[DOTFINI130:%.*]] +// CHECK-DEBUG: .fini130: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT101_EXITSTUB:%.*]], !dbg [[DBG272]] +// CHECK-DEBUG: omp_loop.body114: +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV118]], [[TMP6]], !dbg [[DBG271]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I106]], i32 [[TMP10]], ptr [[AGG_CAPTURED108]]), !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG273:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV121:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG273]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG271]] +// CHECK-DEBUG-NEXT: [[ADD122:%.*]] = fadd double [[CONV121]], [[TMP12]], !dbg [[DBG274:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV123:%.*]] = fptrunc double [[ADD122]] to float, !dbg [[DBG273]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG275:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV123]], ptr [[TMP13]], align 4, !dbg [[DBG276:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC115]], !dbg [[DBG269]] +// CHECK-DEBUG: omp_loop.inc115: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT120]] = add nuw i32 [[OMP_LOOP_IV118]], 1, !dbg [[DBG269]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER112]], !dbg [[DBG269]] +// CHECK-DEBUG: omp.par.exit101.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG249:![0-9]+]] { -// CHECK-DEBUG-NEXT: omp.par.entry43: +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR38:%.*]], ptr noalias [[ZERO_ADDR39:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG277:![0-9]+]] { +// CHECK-DEBUG-NEXT: omp.par.entry40: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META46]] // CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8, !align [[META88]] // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 -// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 -// CHECK-DEBUG-NEXT: [[TID48:%.*]] = load i32, ptr [[TID_ADDR_LOCAL47]], align 4 -// CHECK-DEBUG-NEXT: [[I51:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META286:![0-9]+]], !DIExpression(), [[META287:![0-9]+]]) -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] -// CHECK-DEBUG: omp.par.region44: -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I51]], [[META250:![0-9]+]], !DIExpression(), [[META256:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I51]], align 4, !dbg [[META256]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG258:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.preheader56: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.header57: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.cond58: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.exit61: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG259:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG259]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.after62: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG260:![0-9]+]] -// CHECK-DEBUG: omp.par.region44.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE45:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize45: -// CHECK-DEBUG-NEXT: br label [[FINI:%.*]] +// CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8, !align [[META88]] +// CHECK-DEBUG-NEXT: [[P_LASTITER66:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND67:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND68:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE69:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL44:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR38]], align 4 +// CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL44]], align 4 +// CHECK-DEBUG-NEXT: [[TID45:%.*]] = load i32, ptr [[TID_ADDR_LOCAL44]], align 4 +// CHECK-DEBUG-NEXT: [[I48:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED49:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED50:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR51:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META278:![0-9]+]], !DIExpression(), [[META279:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META280:![0-9]+]], !DIExpression(), [[META281:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION41:%.*]] +// CHECK-DEBUG: omp.par.region41: +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I48]], [[META284:![0-9]+]], !DIExpression(), [[META290:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I48]], align 4, !dbg [[META290]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED49]], i32 0, i32 0, !dbg [[DBG291:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I48]], ptr [[TMP2]], align 8, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED50]], i32 0, i32 0, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I48]], align 4, !dbg [[DBG292:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR51]], ptr [[AGG_CAPTURED49]]), !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT52:%.*]] = load i32, ptr [[DOTCOUNT_ADDR51]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER53:%.*]], !dbg [[DBG291]] +// CHECK-DEBUG: omp_loop.preheader53: +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND67]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT52]], 1, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND68]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE69]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM70:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM70]], i32 34, ptr [[P_LASTITER66]], ptr [[P_LOWERBOUND67]], ptr [[P_UPPERBOUND68]], ptr [[P_STRIDE69]], i32 1, i32 0), !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND67]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND68]], align 4, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER54:%.*]], !dbg [[DBG291]] +// CHECK-DEBUG: omp_loop.header54: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV60:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER53]] ], [ [[OMP_LOOP_NEXT62:%.*]], [[OMP_LOOP_INC57:%.*]] ], !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND55:%.*]], !dbg [[DBG291]] +// CHECK-DEBUG: omp_loop.cond55: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP61:%.*]] = icmp ult i32 [[OMP_LOOP_IV60]], [[TMP9]], !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP61]], label [[OMP_LOOP_BODY56:%.*]], label [[OMP_LOOP_EXIT58:%.*]], !dbg [[DBG291]] +// CHECK-DEBUG: omp_loop.exit58: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM70]]), !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM71:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG293:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM71]]), !dbg [[DBG293]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER59:%.*]], !dbg [[DBG291]] +// CHECK-DEBUG: omp_loop.after59: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION41_PARALLEL_AFTER:%.*]], !dbg [[DBG294:![0-9]+]] +// CHECK-DEBUG: omp.par.region41.parallel.after: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE42:%.*]] +// CHECK-DEBUG: omp.par.pre_finalize42: +// CHECK-DEBUG-NEXT: br label [[DOTFINI:%.*]] // CHECK-DEBUG: .fini: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]] -// CHECK-DEBUG: omp_loop.body59: -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG259]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG261:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG261]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG259]] -// CHECK-DEBUG-NEXT: [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG262:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG261]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG263:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG264:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC60]], !dbg [[DBG257]] -// CHECK-DEBUG: omp_loop.inc60: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57]], !dbg [[DBG257]] -// CHECK-DEBUG: omp.par.exit46.exitStub: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT43_EXITSTUB:%.*]], !dbg [[DBG294]] +// CHECK-DEBUG: omp_loop.body56: +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV60]], [[TMP6]], !dbg [[DBG293]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I48]], i32 [[TMP10]], ptr [[AGG_CAPTURED50]]), !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG295:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV63:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG295]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG293]] +// CHECK-DEBUG-NEXT: [[ADD64:%.*]] = fadd double [[CONV63]], [[TMP12]], !dbg [[DBG296:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV65:%.*]] = fptrunc double [[ADD64]] to float, !dbg [[DBG295]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG297:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV65]], ptr [[TMP13]], align 4, !dbg [[DBG298:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC57]], !dbg [[DBG291]] +// CHECK-DEBUG: omp_loop.inc57: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT62]] = add nuw i32 [[OMP_LOOP_IV60]], 1, !dbg [[DBG291]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER54]], !dbg [[DBG291]] +// CHECK-DEBUG: omp.par.exit43.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.5 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG265:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG299:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2502,67 +2502,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META300:![0-9]+]], !DIExpression(), [[META301:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META267]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META302:![0-9]+]], !DIExpression(), [[META301]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META269:![0-9]+]], !DIExpression(), [[META271:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG272]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG272]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META271]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META274:![0-9]+]], !DIExpression(), [[META275:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META276:![0-9]+]], !DIExpression(), [[META275]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META275]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META303:![0-9]+]], !DIExpression(), [[META305:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG306]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG306]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META305]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META308:![0-9]+]], !DIExpression(), [[META309:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META310:![0-9]+]], !DIExpression(), [[META309]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META309]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META309]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META275]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META309]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META309]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META309]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META275]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META275]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META275]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG277:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META309]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META309]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META309]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG311:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.6 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG279:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG313:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META280:![0-9]+]], !DIExpression(), [[META281:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META314:![0-9]+]], !DIExpression(), [[META315:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META281]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META316:![0-9]+]], !DIExpression(), [[META315]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META281]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META317:![0-9]+]], !DIExpression(), [[META315]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG284]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG286:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG286]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG286]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG286]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META281]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG284]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG318:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG318]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG320:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG320]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG320]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG320]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META315]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG318]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.7 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG287:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG321:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2570,67 +2570,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META289:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META322:![0-9]+]], !DIExpression(), [[META323:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META289]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META323]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META291:![0-9]+]], !DIExpression(), [[META293:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG294:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG294]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG294]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META293]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META296:![0-9]+]], !DIExpression(), [[META297:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META298:![0-9]+]], !DIExpression(), [[META297]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META297]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META325:![0-9]+]], !DIExpression(), [[META327:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG328]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG328]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META327]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META330:![0-9]+]], !DIExpression(), [[META331:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META332:![0-9]+]], !DIExpression(), [[META331]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META331]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META331]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META297]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META331]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META331]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META331]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META297]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META297]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META297]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG299:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META331]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META331]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META331]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG333:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.8 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG301:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG335:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META302:![0-9]+]], !DIExpression(), [[META303:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META336:![0-9]+]], !DIExpression(), [[META337:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META304:![0-9]+]], !DIExpression(), [[META303]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META338:![0-9]+]], !DIExpression(), [[META337]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META303]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META339:![0-9]+]], !DIExpression(), [[META337]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG306]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG308:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG308]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG308]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG308]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META303]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG306]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG340:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG340]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG342:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG342]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG342]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG342]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META337]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG340]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.9 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG309:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG343:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2638,67 +2638,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META310:![0-9]+]], !DIExpression(), [[META311:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META344:![0-9]+]], !DIExpression(), [[META345:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META312:![0-9]+]], !DIExpression(), [[META311]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META346:![0-9]+]], !DIExpression(), [[META345]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META313:![0-9]+]], !DIExpression(), [[META315:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG316]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG316]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META315]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META318:![0-9]+]], !DIExpression(), [[META319:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META320:![0-9]+]], !DIExpression(), [[META319]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META319]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META347:![0-9]+]], !DIExpression(), [[META349:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG350]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG350]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META349]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META352:![0-9]+]], !DIExpression(), [[META353:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META354:![0-9]+]], !DIExpression(), [[META353]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META353]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META353]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META319]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META353]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META353]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META353]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META319]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META319]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META319]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG321:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META353]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META353]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META353]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG355:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.10 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG323:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG357:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META325:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META358:![0-9]+]], !DIExpression(), [[META359:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META326:![0-9]+]], !DIExpression(), [[META325]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META360:![0-9]+]], !DIExpression(), [[META359]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META325]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META361:![0-9]+]], !DIExpression(), [[META359]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG328]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG330:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG330]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG330]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG330]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META325]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG328]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG362:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG362]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG364:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG364]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG364]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG364]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META359]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG362]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.11 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG331:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG365:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2706,67 +2706,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META333:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META366:![0-9]+]], !DIExpression(), [[META367:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META333]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META368:![0-9]+]], !DIExpression(), [[META367]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META335:![0-9]+]], !DIExpression(), [[META337:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG338:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG338]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG338]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META337]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META340:![0-9]+]], !DIExpression(), [[META341:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META342:![0-9]+]], !DIExpression(), [[META341]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META341]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META369:![0-9]+]], !DIExpression(), [[META371:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG372]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG372]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META371]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META374:![0-9]+]], !DIExpression(), [[META375:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META376:![0-9]+]], !DIExpression(), [[META375]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META375]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META375]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META341]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META375]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META375]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META375]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META341]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META341]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META341]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG343:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META375]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META375]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META375]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG377:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.12 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG345:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG379:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META346:![0-9]+]], !DIExpression(), [[META347:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META380:![0-9]+]], !DIExpression(), [[META381:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META348:![0-9]+]], !DIExpression(), [[META347]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META382:![0-9]+]], !DIExpression(), [[META381]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META349:![0-9]+]], !DIExpression(), [[META347]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META383:![0-9]+]], !DIExpression(), [[META381]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG350]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG352:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG352]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG352]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG352]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META347]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG350]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG384:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG384]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG386:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG386]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG386]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG386]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META381]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG384]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.13 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG353:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG387:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2774,67 +2774,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META354:![0-9]+]], !DIExpression(), [[META355:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META388:![0-9]+]], !DIExpression(), [[META389:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META356:![0-9]+]], !DIExpression(), [[META355]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META390:![0-9]+]], !DIExpression(), [[META389]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META357:![0-9]+]], !DIExpression(), [[META359:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG360]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG360]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META359]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META362:![0-9]+]], !DIExpression(), [[META363:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META364:![0-9]+]], !DIExpression(), [[META363]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META363]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META391:![0-9]+]], !DIExpression(), [[META393:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG394]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG394]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META393]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META396:![0-9]+]], !DIExpression(), [[META397:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META398:![0-9]+]], !DIExpression(), [[META397]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META397]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META397]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META363]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META397]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META397]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META397]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META363]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META363]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META363]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG365:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META397]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META397]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META397]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG399:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.14 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG367:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG401:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META368:![0-9]+]], !DIExpression(), [[META369:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META402:![0-9]+]], !DIExpression(), [[META403:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META370:![0-9]+]], !DIExpression(), [[META369]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META404:![0-9]+]], !DIExpression(), [[META403]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META371:![0-9]+]], !DIExpression(), [[META369]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META405:![0-9]+]], !DIExpression(), [[META403]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG372]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG374:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG374]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG374]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG374]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META369]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG372]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG406:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG406]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG408:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG408]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG408]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG408]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META403]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG406]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.15 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG375:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG409:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2842,67 +2842,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META376:![0-9]+]], !DIExpression(), [[META377:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META410:![0-9]+]], !DIExpression(), [[META411:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META378:![0-9]+]], !DIExpression(), [[META377]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META412:![0-9]+]], !DIExpression(), [[META411]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META379:![0-9]+]], !DIExpression(), [[META381:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG382:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG382]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG382]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META381]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META384:![0-9]+]], !DIExpression(), [[META385:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META386:![0-9]+]], !DIExpression(), [[META385]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META385]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META413:![0-9]+]], !DIExpression(), [[META415:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG416]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG416]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META415]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META418:![0-9]+]], !DIExpression(), [[META419:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META420:![0-9]+]], !DIExpression(), [[META419]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META419]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META419]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META385]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META419]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META419]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META419]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META385]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META385]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META385]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG387:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META419]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META419]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META419]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG421:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.16 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG389:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG423:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META390:![0-9]+]], !DIExpression(), [[META391:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META424:![0-9]+]], !DIExpression(), [[META425:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META392:![0-9]+]], !DIExpression(), [[META391]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META426:![0-9]+]], !DIExpression(), [[META425]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META393:![0-9]+]], !DIExpression(), [[META391]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META427:![0-9]+]], !DIExpression(), [[META425]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG394]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG396:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG396]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG396]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG396]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META391]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG394]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG428:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG428]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG430:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG430]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG430]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG430]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META425]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG428]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.17 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG397:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG431:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2910,67 +2910,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META398:![0-9]+]], !DIExpression(), [[META399:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META432:![0-9]+]], !DIExpression(), [[META433:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META400:![0-9]+]], !DIExpression(), [[META399]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META434:![0-9]+]], !DIExpression(), [[META433]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META401:![0-9]+]], !DIExpression(), [[META403:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG404:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG404]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG404]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META403]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META406:![0-9]+]], !DIExpression(), [[META407:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META408:![0-9]+]], !DIExpression(), [[META407]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META407]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META435:![0-9]+]], !DIExpression(), [[META437:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG438]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG438]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META437]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META440:![0-9]+]], !DIExpression(), [[META441:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META442:![0-9]+]], !DIExpression(), [[META441]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META441]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META441]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META407]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META441]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META441]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META441]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META407]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META407]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META407]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG409:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META441]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META441]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META441]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG443:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.18 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG411:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG445:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META412:![0-9]+]], !DIExpression(), [[META413:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META446:![0-9]+]], !DIExpression(), [[META447:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META414:![0-9]+]], !DIExpression(), [[META413]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META448:![0-9]+]], !DIExpression(), [[META447]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META415:![0-9]+]], !DIExpression(), [[META413]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META449:![0-9]+]], !DIExpression(), [[META447]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG416]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG418:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG418]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG418]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG418]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META413]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG416]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG450:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG450]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG452:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG452]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG452]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG452]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META447]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG450]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.19 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG419:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG453:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2978,61 +2978,61 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META420:![0-9]+]], !DIExpression(), [[META421:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META454:![0-9]+]], !DIExpression(), [[META455:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META422:![0-9]+]], !DIExpression(), [[META421]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META456:![0-9]+]], !DIExpression(), [[META455]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META423:![0-9]+]], !DIExpression(), [[META425:![0-9]+]]) -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG426:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG426]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG426]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META425]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META428:![0-9]+]], !DIExpression(), [[META429:![0-9]+]]) -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META430:![0-9]+]], !DIExpression(), [[META429]]) -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META429]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META457:![0-9]+]], !DIExpression(), [[META459:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG460:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG460]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG460]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META459]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META462:![0-9]+]], !DIExpression(), [[META463:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META464:![0-9]+]], !DIExpression(), [[META463]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META463]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META463]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META429]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META463]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META463]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META463]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META429]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META429]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META429]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG431:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META463]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META463]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META463]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG465:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.20 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG433:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG467:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META434:![0-9]+]], !DIExpression(), [[META435:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META468:![0-9]+]], !DIExpression(), [[META469:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META436:![0-9]+]], !DIExpression(), [[META435]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META470:![0-9]+]], !DIExpression(), [[META469]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META437:![0-9]+]], !DIExpression(), [[META435]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META471:![0-9]+]], !DIExpression(), [[META469]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG438]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG440:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG440]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG440]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG440]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META435]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG438]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG472:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG472]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG474:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG474]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG474]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG474]], !nonnull [[META12]], !align [[META46]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META469]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG472]] // diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index e01fd0da31ee..a41fcb2f9e39 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -88,7 +88,7 @@ int inline_decl() { // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] // CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: @@ -152,7 +152,7 @@ int inline_decl() { // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: ret void // @@ -185,8 +185,8 @@ int inline_decl() { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: @@ -250,7 +250,7 @@ int inline_decl() { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: ret void // @@ -286,115 +286,115 @@ int inline_decl() { // CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META29:![0-9]+]], !DIExpression(), [[META28]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 // CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META30:![0-9]+]], !DIExpression(), [[META31:![0-9]+]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG32:![0-9]+]], !nonnull [[META11:![0-9]+]], !align [[META33:![0-9]+]] +// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG34:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG36:![0-9]+]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG36:![0-9]+]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10, !dbg [[DBG38:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG39:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG37:![0-9]+]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10, !dbg [[DBG39:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG40:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META44:![0-9]+]], !DIExpression(), [[META43]]) -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG45:![0-9]+]] -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META46:![0-9]+]], !DIExpression(), [[META43]]) -// CHECK2-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META47:![0-9]+]], !DIExpression(), [[META43]]) -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META48:![0-9]+]], !DIExpression(), [[META43]]) -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META49:![0-9]+]], !DIExpression(), [[META43]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG50:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG50]] -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG51:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP4]], 4, !dbg [[DBG45]] -// CHECK2-NEXT: br i1 [[CMP1]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG45]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META41:![0-9]+]], !DIExpression(), [[META44:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META45:![0-9]+]], !DIExpression(), [[META44]]) +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG46:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META47:![0-9]+]], !DIExpression(), [[META44]]) +// CHECK2-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META48:![0-9]+]], !DIExpression(), [[META44]]) +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META49:![0-9]+]], !DIExpression(), [[META44]]) +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META50:![0-9]+]], !DIExpression(), [[META44]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG51:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG51]] +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG52:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP4]], 4, !dbg [[DBG46]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG46]] // CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]], !dbg [[DBG45]] +// CHECK2-NEXT: br label [[COND_END:%.*]], !dbg [[DBG46]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: br label [[COND_END]], !dbg [[DBG45]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: br label [[COND_END]], !dbg [[DBG46]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ], !dbg [[DBG45]] -// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG50]] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ], !dbg [[DBG46]] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG51]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]], !dbg [[DBG52:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG50]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]], !dbg [[DBG53:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG51]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1, !dbg [[DBG53:![0-9]+]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG53]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4, !dbg [[DBG53]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1, !dbg [[DBG54]] -// CHECK2-NEXT: store i32 [[INC]], ptr [[K]], align 4, !dbg [[DBG54]] -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG56:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1, !dbg [[DBG54:![0-9]+]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG54]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG55:![0-9]+]] +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1, !dbg [[DBG55]] +// CHECK2-NEXT: store i32 [[INC]], ptr [[K]], align 4, !dbg [[DBG55]] +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]] // CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG51]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG52]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG52]] -// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG52]] -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG51]], !llvm.loop [[LOOP57:![0-9]+]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG46]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG53]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG53]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG52]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG51]] +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG52]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG51]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG51]] -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG58:![0-9]+]] -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG58]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG58]] -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP15]]), !dbg [[DBG58]] -// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG59:![0-9]+]] +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG52]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG52]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG59:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG59]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG59]] +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP15]]), !dbg [[DBG59]] +// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG60:![0-9]+]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG60:![0-9]+]] -// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP16]], 1, !dbg [[DBG60]] -// CHECK2-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4, !dbg [[DBG60]] -// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG61:![0-9]+]], !llvm.loop [[LOOP62:![0-9]+]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG61:![0-9]+]] +// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP16]], 1, !dbg [[DBG61]] +// CHECK2-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4, !dbg [[DBG61]] +// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG62:![0-9]+]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: ret void, !dbg [[DBG65:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG66:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z12outline_declv.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] !dbg [[DBG66:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] !dbg [[DBG67:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META67:![0-9]+]], !DIExpression(), [[META68:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META69:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META68]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META69]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META68]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG71:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: call void @_Z12outline_declv.omp_outlined_debug__(ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]], !dbg [[DBG71]] -// CHECK2-NEXT: ret void, !dbg [[DBG71]] +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META69]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG72:![0-9]+]], !nonnull [[META11]], !align [[META33]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: call void @_Z12outline_declv.omp_outlined_debug__(ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]], !dbg [[DBG72]] +// CHECK2-NEXT: ret void, !dbg [[DBG72]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv -// CHECK2-SAME: () #[[ATTR0]] !dbg [[DBG74:![0-9]+]] { +// CHECK2-SAME: () #[[ATTR0]] !dbg [[DBG75:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: #dbg_declare(ptr [[I]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) -// CHECK2-NEXT: #dbg_declare(ptr [[RES]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13:[0-9]+]], i32 2, ptr @_Z11inline_declv.omp_outlined, ptr [[I]], ptr [[RES]]), !dbg [[DBG79:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG80:![0-9]+]] -// CHECK2-NEXT: ret i32 [[TMP0]], !dbg [[DBG81:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[I]], [[META76:![0-9]+]], !DIExpression(), [[META77:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[RES]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13:[0-9]+]], i32 2, ptr @_Z11inline_declv.omp_outlined, ptr [[I]], ptr [[RES]]), !dbg [[DBG80:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG81:![0-9]+]] +// CHECK2-NEXT: ret i32 [[TMP0]], !dbg [[DBG82:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] !dbg [[DBG82:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] !dbg [[DBG83:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -408,112 +408,112 @@ int inline_decl() { // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META85:![0-9]+]], !DIExpression(), [[META86:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META86:![0-9]+]], !DIExpression(), [[META87:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META87:![0-9]+]], !DIExpression(), [[META86]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META87]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META89:![0-9]+]], !DIExpression(), [[META90:![0-9]+]]) // CHECK2-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[RES_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG92:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG92]] -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG93:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG95:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[RES_ADDR]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG93:![0-9]+]], !nonnull [[META11]], !align [[META33]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG93]], !nonnull [[META11]], !align [[META33]] +// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG96:![0-9]+]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG98:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG99:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG97:![0-9]+]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG99:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG100:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META100:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META104:![0-9]+]], !DIExpression(), [[META103]]) -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META106:![0-9]+]], !DIExpression(), [[META103]]) -// CHECK2-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META107:![0-9]+]], !DIExpression(), [[META103]]) -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META108:![0-9]+]], !DIExpression(), [[META103]]) -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META109:![0-9]+]], !DIExpression(), [[META103]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG110:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG110]] -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB8:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG111:![0-9]+]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP5]], 4, !dbg [[DBG105]] -// CHECK2-NEXT: br i1 [[CMP1]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG105]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META101:![0-9]+]], !DIExpression(), [[META104:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META105:![0-9]+]], !DIExpression(), [[META104]]) +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG106:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META107:![0-9]+]], !DIExpression(), [[META104]]) +// CHECK2-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META108:![0-9]+]], !DIExpression(), [[META104]]) +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META109:![0-9]+]], !DIExpression(), [[META104]]) +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META110:![0-9]+]], !DIExpression(), [[META104]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG111:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG111]] +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB8:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG112:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP5]], 4, !dbg [[DBG106]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG106]] // CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]], !dbg [[DBG105]] +// CHECK2-NEXT: br label [[COND_END:%.*]], !dbg [[DBG106]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: br label [[COND_END]], !dbg [[DBG105]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: br label [[COND_END]], !dbg [[DBG106]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG105]] -// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG110]] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG106]] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG111]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG112:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG110]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG113:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG111]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1, !dbg [[DBG113:![0-9]+]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG113]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4, !dbg [[DBG113]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG114:![0-9]+]] -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG114]] -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4, !dbg [[DBG114]] -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG116:![0-9]+]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1, !dbg [[DBG114:![0-9]+]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG114]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4, !dbg [[DBG114]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG115:![0-9]+]] +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG115]] +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4, !dbg [[DBG115]] +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG117:![0-9]+]] // CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG111]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG112]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1, !dbg [[DBG112]] -// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG112]] -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG111]], !llvm.loop [[LOOP117:![0-9]+]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1, !dbg [[DBG113]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG113]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG112]], !llvm.loop [[LOOP118:![0-9]+]] // CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG111]] +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG112]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG111]] -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10:[0-9]+]], i32 [[TMP14]]), !dbg [[DBG118:![0-9]+]] -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !dbg [[DBG118]] -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[TMP16]]), !dbg [[DBG118]] -// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG119:![0-9]+]] +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG112]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10:[0-9]+]], i32 [[TMP14]]), !dbg [[DBG119:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG119]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !dbg [[DBG119]] +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[TMP16]]), !dbg [[DBG119]] +// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG120:![0-9]+]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG120:![0-9]+]] -// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1, !dbg [[DBG120]] -// CHECK2-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4, !dbg [[DBG120]] -// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG121:![0-9]+]], !llvm.loop [[LOOP122:![0-9]+]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG121:![0-9]+]] +// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1, !dbg [[DBG121]] +// CHECK2-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4, !dbg [[DBG121]] +// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG122:![0-9]+]], !llvm.loop [[LOOP123:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: ret void, !dbg [[DBG124:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG125:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] !dbg [[DBG125:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] !dbg [[DBG126:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META127:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META128:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META127]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META128]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META127]]) +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META128]]) // CHECK2-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[RES_ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META127]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG131:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG131]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG131]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG131]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG131]] -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG131]] -// CHECK2-NEXT: call void @_Z11inline_declv.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]]) #[[ATTR2]], !dbg [[DBG131]] -// CHECK2-NEXT: ret void, !dbg [[DBG131]] +// CHECK2-NEXT: #dbg_declare(ptr [[RES_ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META128]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG132:![0-9]+]], !nonnull [[META11]], !align [[META33]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG132]], !nonnull [[META11]], !align [[META33]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG132]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG132]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG132]] +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG132]] +// CHECK2-NEXT: call void @_Z11inline_declv.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]]) #[[ATTR2]], !dbg [[DBG132]] +// CHECK2-NEXT: ret void, !dbg [[DBG132]] // // // CHECK3-LABEL: define {{[^@]+}}@_Z12outline_declv @@ -522,14 +522,13 @@ int inline_decl() { // CHECK3-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: // CHECK3-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[I]], ptr [[GEP_I]], align 8 // CHECK3-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[K]], ptr [[GEP_K]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z12outline_declv..omp_par, ptr [[STRUCTARG]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z12outline_declv..omp_par, ptr [[STRUCTARG]]) // CHECK3-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK3: omp.par.exit: // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[K]], align 4 @@ -540,9 +539,9 @@ int inline_decl() { // CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: // CHECK3-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 +// CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8, !align [[META3:![0-9]+]] // CHECK3-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8 +// CHECK3-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8, !align [[META3]] // CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 @@ -567,7 +566,9 @@ int inline_decl() { // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] +// CHECK3: .fini: +// CHECK3-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK3: for.body: // CHECK3-NEXT: store i32 0, ptr [[LOADGEP_K]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 @@ -583,8 +584,8 @@ int inline_decl() { // CHECK3-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1 // CHECK3-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] @@ -597,17 +598,17 @@ int inline_decl() { // CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp_loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK3: omp_loop.after: // CHECK3-NEXT: br label [[FOR_INC:%.*]] // CHECK3: for.inc: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[INC4]], ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK3-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i32 [[INC3]], ptr [[LOADGEP_I]], align 4 +// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK3: omp_loop.body: // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]] // CHECK3-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]) @@ -623,7 +624,7 @@ int inline_decl() { // // // CHECK3-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -634,7 +635,7 @@ int inline_decl() { // CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META3]] // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK3-NEXT: store i32 5, ptr [[DOTSTOP]], align 4 @@ -657,13 +658,13 @@ int inline_decl() { // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META3]] // CHECK3-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -677,7 +678,7 @@ int inline_decl() { // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META3]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK3-NEXT: ret void // @@ -688,7 +689,6 @@ int inline_decl() { // CHECK3-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: // CHECK3-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -696,7 +696,7 @@ int inline_decl() { // CHECK3-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[RES]], ptr [[GEP_RES]], align 8 // CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK3-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK3: omp.par.exit: // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4 // CHECK3-NEXT: ret i32 [[TMP0]] @@ -706,9 +706,9 @@ int inline_decl() { // CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: omp.par.entry: // CHECK3-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 +// CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8, !align [[META3]] // CHECK3-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8 +// CHECK3-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8, !align [[META3]] // CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 @@ -734,7 +734,9 @@ int inline_decl() { // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] +// CHECK3: .fini: +// CHECK3-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK3: for.body: // CHECK3-NEXT: store i32 0, ptr [[K]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 @@ -750,8 +752,8 @@ int inline_decl() { // CHECK3-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1 // CHECK3-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] @@ -764,17 +766,17 @@ int inline_decl() { // CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp_loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK3: omp_loop.after: // CHECK3-NEXT: br label [[FOR_INC:%.*]] // CHECK3: for.inc: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[INC4]], ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i32 [[INC3]], ptr [[LOADGEP_I]], align 4 +// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp_loop.body: // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]] // CHECK3-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]) @@ -790,7 +792,7 @@ int inline_decl() { // // // CHECK3-LABEL: define {{[^@]+}}@__captured_stmt.2 -// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -801,7 +803,7 @@ int inline_decl() { // CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6]], !align [[META3]] // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK3-NEXT: store i32 5, ptr [[DOTSTOP]], align 4 @@ -824,13 +826,13 @@ int inline_decl() { // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META3]] // CHECK3-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__captured_stmt.3 -// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 @@ -844,7 +846,7 @@ int inline_decl() { // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META3]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK3-NEXT: ret void // @@ -857,27 +859,26 @@ int inline_decl() { // CHECK4-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK4-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) // CHECK4-NEXT: #dbg_declare(ptr [[K]], [[META16:![0-9]+]], !DIExpression(), [[META15]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG17:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: // CHECK4-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[I]], ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[K]], ptr [[GEP_K]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z12outline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG18:![0-9]+]] -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z12outline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG17:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK4: omp.par.exit: -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG20:![0-9]+]] -// CHECK4-NEXT: ret i32 [[TMP0]], !dbg [[DBG20]] +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG19:![0-9]+]] +// CHECK4-NEXT: ret i32 [[TMP0]], !dbg [[DBG19]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z12outline_declv..omp_par -// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG21:![0-9]+]] { +// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG20:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: // CHECK4-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 +// CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8, !align [[META22:![0-9]+]] // CHECK4-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK4-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8 +// CHECK4-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8, !align [[META22]] // CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 @@ -889,80 +890,80 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META24:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) -// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_K]], [[META26:![0-9]+]], !DIExpression(), [[META25]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META23:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_K]], [[META25:![0-9]+]], !DIExpression(), [[META24]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]] -// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG23]] +// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG26:![0-9]+]] +// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG26]] // CHECK4: for.cond: -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG25:![0-9]+]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG25]] -// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG23]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG28:![0-9]+]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG28]] +// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG26]] // CHECK4: for.end: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG27:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG30:![0-9]+]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4-NEXT: br label [[DOTFINI:%.*]] // CHECK4: .fini: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG27]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG30]] // CHECK4: for.body: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG28:![0-9]+]] -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG28]] -// CHECK4-NEXT: store ptr [[LOADGEP_K]], ptr [[TMP3]], align 8, !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[LOADGEP_K]], align 4, !dbg [[DBG32:![0-9]+]] -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG28]] -// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG28]] +// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG31:![0-9]+]] +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG31]] +// CHECK4-NEXT: store ptr [[LOADGEP_K]], ptr [[TMP3]], align 8, !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[LOADGEP_K]], align 4, !dbg [[DBG35:![0-9]+]] +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG31]] +// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG31]] // CHECK4: omp_loop.preheader: -// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG28]] -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG28]] -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[DBG28]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG28]] +// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG31]] +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG31]] +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[DBG31]] +// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG31]] // CHECK4: omp_loop.header: -// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG28]] -// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG28]] +// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG31]] +// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG31]] // CHECK4: omp_loop.cond: -// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]], !dbg [[DBG28]] -// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG28]] +// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]], !dbg [[DBG31]] +// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG31]] // CHECK4: omp_loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG28]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]), !dbg [[DBG33:![0-9]+]] -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG33]] -// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG28]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]), !dbg [[DBG31]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]), !dbg [[DBG36:![0-9]+]] +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG36]] +// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG31]] // CHECK4: omp_loop.after: -// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG34:![0-9]+]] +// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG37:![0-9]+]] // CHECK4: for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG25]] -// CHECK4-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG25]] -// CHECK4-NEXT: store i32 [[INC4]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG25]] -// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG28]] +// CHECK4-NEXT: store i32 [[INC3]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG28]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK4: omp_loop.body: -// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]], !dbg [[DBG33]] -// CHECK4-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_K]], align 4, !dbg [[DBG37:![0-9]+]] -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1, !dbg [[DBG37]] -// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_K]], align 4, !dbg [[DBG37]] -// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG28]] +// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]], !dbg [[DBG36]] +// CHECK4-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_K]], align 4, !dbg [[DBG40:![0-9]+]] +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1, !dbg [[DBG40]] +// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_K]], align 4, !dbg [[DBG40]] +// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG31]] // CHECK4: omp_loop.inc: -// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG28]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG28]] +// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG31]] +// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG31]] // CHECK4: omp.par.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG39:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG42:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -970,94 +971,93 @@ int inline_decl() { // CHECK4-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META49:![0-9]+]], !DIExpression(), [[META48]]) +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META52:![0-9]+]], !DIExpression(), [[META51]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META50:![0-9]+]], !DIExpression(), [[META52:![0-9]+]]) -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG53:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG53]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG53]] -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META52]] -// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META55:![0-9]+]], !DIExpression(), [[META56:![0-9]+]]) -// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[META56]] -// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META57:![0-9]+]], !DIExpression(), [[META56]]) -// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META56]] -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META56]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META53:![0-9]+]], !DIExpression(), [[META55:![0-9]+]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG56:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG56]], !nonnull [[META13:![0-9]+]], !align [[META22]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG56]] +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META55]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META58:![0-9]+]], !DIExpression(), [[META59:![0-9]+]]) +// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[META59]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META60:![0-9]+]], !DIExpression(), [[META59]]) +// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META59]] +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META59]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META56]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META56]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META56]] -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META56]] -// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META56]] -// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[META56]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META59]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META59]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META59]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META59]] +// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META59]] +// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[META59]] // CHECK4: cond.false: -// CHECK4-NEXT: br label [[COND_END]], !dbg [[META56]] +// CHECK4-NEXT: br label [[COND_END]], !dbg [[META59]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META56]] -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META56]] -// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META56]] -// CHECK4-NEXT: ret void, !dbg [[DBG58:![0-9]+]] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META59]] +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META59]], !nonnull [[META13]], !align [[META22]] +// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META59]] +// CHECK4-NEXT: ret void, !dbg [[DBG61:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG60:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG63:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META69:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]]) // CHECK4-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META69]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META72]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META69]]) +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META74:![0-9]+]], !DIExpression(), [[META72]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG72:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG72]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG74:![0-9]+]] -// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG74]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG74]] -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG74]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META69]] -// CHECK4-NEXT: ret void, !dbg [[DBG72]] +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG75:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG75]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG77:![0-9]+]] +// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG77]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG77]] +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG77]], !nonnull [[META13]], !align [[META22]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META72]] +// CHECK4-NEXT: ret void, !dbg [[DBG75]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z11inline_declv -// CHECK4-SAME: () #[[ATTR0]] !dbg [[DBG77:![0-9]+]] { +// CHECK4-SAME: () #[[ATTR0]] !dbg [[DBG80:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[I]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) -// CHECK4-NEXT: #dbg_declare(ptr [[RES]], [[META80:![0-9]+]], !DIExpression(), [[META79]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[I]], [[META81:![0-9]+]], !DIExpression(), [[META82:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[RES]], [[META83:![0-9]+]], !DIExpression(), [[META82]]) // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: // CHECK4-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[I]], ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[RES]], ptr [[GEP_RES]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG82:![0-9]+]] -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6:[0-9]+]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG84:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK4: omp.par.exit: -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG84:![0-9]+]] -// CHECK4-NEXT: ret i32 [[TMP0]], !dbg [[DBG84]] +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG86:![0-9]+]] +// CHECK4-NEXT: ret i32 [[TMP0]], !dbg [[DBG86]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z11inline_declv..omp_par -// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] { +// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG87:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: // CHECK4-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 +// CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8, !align [[META22]] // CHECK4-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK4-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8 +// CHECK4-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8, !align [[META22]] // CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 @@ -1070,81 +1070,81 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) -// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_RES]], [[META93:![0-9]+]], !DIExpression(), [[META92]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_RES]], [[META90:![0-9]+]], !DIExpression(), [[META89]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]] -// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG86]] +// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG91:![0-9]+]] +// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG91]] // CHECK4: for.cond: -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88:![0-9]+]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG88]] -// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG86]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG93:![0-9]+]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG93]] +// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG91]] // CHECK4: for.end: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG90:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG95:![0-9]+]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4-NEXT: br label [[DOTFINI:%.*]] // CHECK4: .fini: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG90]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG95]] // CHECK4: for.body: -// CHECK4-NEXT: #dbg_declare(ptr [[K]], [[META91:![0-9]+]], !DIExpression(), [[META95:![0-9]+]]) -// CHECK4-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[META95]] -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[META95]] -// CHECK4-NEXT: store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[META95]] -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[META95]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[META95]] -// CHECK4-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[META95]] -// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[META95]] -// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[META95]] +// CHECK4-NEXT: #dbg_declare(ptr [[K]], [[META96:![0-9]+]], !DIExpression(), [[META100:![0-9]+]]) +// CHECK4-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[META100]] +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[META100]] +// CHECK4-NEXT: store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[META100]] +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[META100]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG101:![0-9]+]] +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[META100]] +// CHECK4-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[META100]] +// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[META100]] +// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[META100]] // CHECK4: omp_loop.preheader: -// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[META95]] -// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[META95]] -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[META95]] -// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[META95]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[META95]] -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[META95]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[META95]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[META95]] -// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[META95]] -// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[META95]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[META95]] +// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[META100]] +// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[META100]] +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[META100]] +// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[META100]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[META100]] +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[META100]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[META100]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[META100]] +// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[META100]] +// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[META100]] +// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[META100]] // CHECK4: omp_loop.header: -// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[META95]] -// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[META95]] +// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[META100]] +// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[META100]] // CHECK4: omp_loop.cond: -// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]], !dbg [[META95]] -// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[META95]] +// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]], !dbg [[META100]] +// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[META100]] // CHECK4: omp_loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[META95]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8]]), !dbg [[DBG97:![0-9]+]] -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB9:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG97]] -// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[META95]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM]]), !dbg [[META100]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8]]), !dbg [[DBG102:![0-9]+]] +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB9:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG102]] +// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[META100]] // CHECK4: omp_loop.after: -// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG98:![0-9]+]] +// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG103:![0-9]+]] // CHECK4: for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]] -// CHECK4-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG88]] -// CHECK4-NEXT: store i32 [[INC4]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]] -// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP99:![0-9]+]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG93]] +// CHECK4-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG93]] +// CHECK4-NEXT: store i32 [[INC3]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG93]] +// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP104:![0-9]+]] // CHECK4: omp_loop.body: -// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]], !dbg [[DBG97]] -// CHECK4-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[META95]] -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG100:![0-9]+]] -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1, !dbg [[DBG100]] -// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG100]] -// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[META95]] +// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]], !dbg [[DBG102]] +// CHECK4-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[META100]] +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1, !dbg [[DBG105]] +// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG105]] +// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[META100]] // CHECK4: omp_loop.inc: -// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[META95]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[META95]] +// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[META100]] +// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[META100]] // CHECK4: omp.par.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.2 -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG102:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG107:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1152,61 +1152,61 @@ int inline_decl() { // CHECK4-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META103:![0-9]+]], !DIExpression(), [[META104:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META108:![0-9]+]], !DIExpression(), [[META109:![0-9]+]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META105:![0-9]+]], !DIExpression(), [[META104]]) +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META110:![0-9]+]], !DIExpression(), [[META109]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META106:![0-9]+]], !DIExpression(), [[META108:![0-9]+]]) -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG109:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG109]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG109]] -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META108]] -// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META111:![0-9]+]], !DIExpression(), [[META112:![0-9]+]]) -// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[META112]] -// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META113:![0-9]+]], !DIExpression(), [[META112]]) -// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META112]] -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META112]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META111:![0-9]+]], !DIExpression(), [[META113:![0-9]+]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG114:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG114]], !nonnull [[META13]], !align [[META22]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG114]] +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META113]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META116:![0-9]+]], !DIExpression(), [[META117:![0-9]+]]) +// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[META117]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META118:![0-9]+]], !DIExpression(), [[META117]]) +// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META117]] +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META117]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META112]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META112]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META112]] -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META112]] -// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META112]] -// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[META112]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META117]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META117]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META117]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META117]] +// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META117]] +// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[META117]] // CHECK4: cond.false: -// CHECK4-NEXT: br label [[COND_END]], !dbg [[META112]] +// CHECK4-NEXT: br label [[COND_END]], !dbg [[META117]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META112]] -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META112]] -// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META112]] -// CHECK4-NEXT: ret void, !dbg [[DBG114:![0-9]+]] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META117]] +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META117]], !nonnull [[META13]], !align [[META22]] +// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META117]] +// CHECK4-NEXT: ret void, !dbg [[DBG119:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.3 -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG116:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG121:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META117:![0-9]+]], !DIExpression(), [[META118:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META122:![0-9]+]], !DIExpression(), [[META123:![0-9]+]]) // CHECK4-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META119:![0-9]+]], !DIExpression(), [[META118]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META123]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META120:![0-9]+]], !DIExpression(), [[META118]]) +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META123]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG121:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG121]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG123:![0-9]+]] -// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG123]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG123]] -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG123]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META118]] -// CHECK4-NEXT: ret void, !dbg [[DBG121]] +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG126:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG126]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG128:![0-9]+]] +// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG128]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG128]] +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG128]], !nonnull [[META13]], !align [[META22]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META123]] +// CHECK4-NEXT: ret void, !dbg [[DBG126]] // diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index 9f6004e37db9..224b94ee86d2 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -111,7 +111,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) @@ -181,8 +181,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) @@ -212,7 +212,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @main.omp_outlined.2.omp_outlined, i64 [[TMP0]], ptr [[TMP1]]) // CHECK1-NEXT: ret void // @@ -229,7 +229,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) @@ -274,7 +274,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !nonnull [[META3]], !align [[META7:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) @@ -350,16 +350,16 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META51:![0-9]+]], !DIExpression(), [[META52:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53]], !nonnull [[META17:![0-9]+]], !align [[META54:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG55:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG55]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG55:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG57:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG55]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG56:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG57:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG58:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG56]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null, !dbg [[DBG53]] @@ -369,36 +369,36 @@ int main (int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG58:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG59:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META59:![0-9]+]], !DIExpression(), [[META60:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META61:![0-9]+]], !DIExpression(), [[META60]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META61]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META60]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META61]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META60]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG64:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG64]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG64]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG64]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG64]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5:[0-9]+]], !dbg [[DBG64]] -// CHECK2-NEXT: ret void, !dbg [[DBG64]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META64:![0-9]+]], !DIExpression(), [[META61]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG65]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG65]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5:[0-9]+]], !dbg [[DBG65]] +// CHECK2-NEXT: ret void, !dbg [[DBG65]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR3:[0-9]+]] comdat !dbg [[DBG65:![0-9]+]] { +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR3:[0-9]+]] comdat !dbg [[DBG66:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META71:![0-9]+]]) -// CHECK2-NEXT: ret void, !dbg [[DBG72:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]]) +// CHECK2-NEXT: ret void, !dbg [[DBG73:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -409,7 +409,7 @@ int main (int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG75:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG76:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -418,46 +418,46 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META79:![0-9]+]], !DIExpression(), [[META80:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META79]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META81:![0-9]+]], !DIExpression(), [[META80]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META81:![0-9]+]], !DIExpression(), [[META79]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] -// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL]], [[META83:![0-9]+]], !DIExpression(), [[META79]]) -// CHECK2-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0(), !dbg [[DBG82]] -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]] -// CHECK2-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META84:![0-9]+]], !DIExpression(), [[META79]]) -// CHECK2-NEXT: #dbg_declare(ptr [[VLA1]], [[META85:![0-9]+]], !DIExpression(), [[META79]]) -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @main.omp_outlined_debug__.2.omp_outlined, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]), !dbg [[DBG82]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP2]]), !dbg [[DBG86]] -// CHECK2-NEXT: ret void, !dbg [[DBG88:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META82:![0-9]+]], !DIExpression(), [[META80]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG83:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL]], [[META84:![0-9]+]], !DIExpression(), [[META80]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0(), !dbg [[DBG83]] +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG83]] +// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG83]] +// CHECK2-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG83]] +// CHECK2-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META85:![0-9]+]], !DIExpression(), [[META80]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA1]], [[META86:![0-9]+]], !DIExpression(), [[META80]]) +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @main.omp_outlined_debug__.2.omp_outlined, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]), !dbg [[DBG83]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG87:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP2]]), !dbg [[DBG87]] +// CHECK2-NEXT: ret void, !dbg [[DBG89:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined.1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG89:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG90:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META92:![0-9]+]], !DIExpression(), [[META91]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META93:![0-9]+]], !DIExpression(), [[META92]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META93:![0-9]+]], !DIExpression(), [[META91]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG94:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG94]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG94]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.2(ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP0]]) #[[ATTR5]], !dbg [[DBG94]] -// CHECK2-NEXT: ret void, !dbg [[DBG94]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META94:![0-9]+]], !DIExpression(), [[META92]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG95:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG95]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG95]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.2(ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP0]]) #[[ATTR5]], !dbg [[DBG95]] +// CHECK2-NEXT: ret void, !dbg [[DBG95]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.2.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG95:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG96:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -465,37 +465,37 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META98:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META99:![0-9]+]], !DIExpression(), [[META100:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META100:![0-9]+]], !DIExpression(), [[META99]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META101:![0-9]+]], !DIExpression(), [[META100]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META101:![0-9]+]], !DIExpression(), [[META99]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META102:![0-9]+]], !DIExpression(), [[META100]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META103:![0-9]+]], !DIExpression(), [[META104:![0-9]+]]) // CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL_ADDR]], [[META104:![0-9]+]], !DIExpression(), [[META105:![0-9]+]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG106:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG106]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG106]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG107:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG107]] +// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL_ADDR]], [[META105:![0-9]+]], !DIExpression(), [[META106:![0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG107:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG107]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG107]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG108:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG108]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG106]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG107]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG108:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG109:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG108]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG109:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG110:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG111:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG109]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG106]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG106]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG106]] -// CHECK2-NEXT: unreachable, !dbg [[DBG106]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG107]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG107]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG107]] +// CHECK2-NEXT: unreachable, !dbg [[DBG107]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.2.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] !dbg [[DBG111:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] !dbg [[DBG112:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -503,147 +503,147 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META112:![0-9]+]], !DIExpression(), [[META113:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META113:![0-9]+]], !DIExpression(), [[META114:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META114:![0-9]+]], !DIExpression(), [[META113]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META115:![0-9]+]], !DIExpression(), [[META114]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META115:![0-9]+]], !DIExpression(), [[META113]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META116:![0-9]+]], !DIExpression(), [[META114]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META116:![0-9]+]], !DIExpression(), [[META113]]) +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META117:![0-9]+]], !DIExpression(), [[META114]]) // CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL_ADDR]], [[META117:![0-9]+]], !DIExpression(), [[META113]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.2.omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]]) #[[ATTR5]], !dbg [[DBG118]] -// CHECK2-NEXT: ret void, !dbg [[DBG118]] +// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL_ADDR]], [[META118:![0-9]+]], !DIExpression(), [[META114]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG119:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG119]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG119]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG119]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG119]] +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG119]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG119]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.2.omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]]) #[[ATTR5]], !dbg [[DBG119]] +// CHECK2-NEXT: ret void, !dbg [[DBG119]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG119:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG120:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META120:![0-9]+]], !DIExpression(), [[META121:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META121:![0-9]+]], !DIExpression(), [[META122:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META122:![0-9]+]], !DIExpression(), [[META121]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META122]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META121]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META122]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META125:![0-9]+]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG126]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 2, ptr @main.omp_outlined_debug__.4.omp_outlined, i64 [[TMP0]], ptr [[TMP1]]), !dbg [[DBG126]] -// CHECK2-NEXT: ret void, !dbg [[DBG127:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG127:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG127]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 2, ptr @main.omp_outlined_debug__.4.omp_outlined, i64 [[TMP0]], ptr [[TMP1]]), !dbg [[DBG127]] +// CHECK2-NEXT: ret void, !dbg [[DBG128:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined.3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG128:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG129:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META130:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META131:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META130]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META131]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META130]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META131]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META130]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG134]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG134]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG134]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG134]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.4(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG134]] -// CHECK2-NEXT: ret void, !dbg [[DBG134]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META131]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG135]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG135]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG135]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG135]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.4(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG135]] +// CHECK2-NEXT: ret void, !dbg [[DBG135]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.4.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG135:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG136:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META137:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META137:![0-9]+]], !DIExpression(), [[META138:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META137]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META139:![0-9]+]], !DIExpression(), [[META138]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META139:![0-9]+]], !DIExpression(), [[META137]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META138]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META141:![0-9]+]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG142:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG142]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG143:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG143]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META142:![0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG143:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG143]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG144:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG144]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG142]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG143]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG144:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG145:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG146:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG145:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG146:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG147:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG145]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG142]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG142]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6]], !dbg [[DBG142]] -// CHECK2-NEXT: unreachable, !dbg [[DBG142]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG143]] +// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG143]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6]], !dbg [[DBG143]] +// CHECK2-NEXT: unreachable, !dbg [[DBG143]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.4.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG147:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG148:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META148:![0-9]+]], !DIExpression(), [[META149:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META149:![0-9]+]], !DIExpression(), [[META150:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META150:![0-9]+]], !DIExpression(), [[META149]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META150]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META149]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META152:![0-9]+]], !DIExpression(), [[META150]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META152:![0-9]+]], !DIExpression(), [[META149]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.4.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG153]] -// CHECK2-NEXT: ret void, !dbg [[DBG153]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META150]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG154]], !nonnull [[META17]], !align [[META54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG154]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG154]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG154]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.4.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG154]] +// CHECK2-NEXT: ret void, !dbg [[DBG154]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG154:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG155:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG161]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG162:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB11:[0-9]+]], i32 2, ptr @_Z5tmainIPPcEiT_.omp_outlined, ptr [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG163:![0-9]+]] -// CHECK2-NEXT: ret i32 0, !dbg [[DBG164:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META167:![0-9]+]], !DIExpression(), [[META168:![0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG169:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG169]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG169]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG169]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG169]] +// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB11:[0-9]+]], i32 2, ptr @_Z5tmainIPPcEiT_.omp_outlined, ptr [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG171:![0-9]+]] +// CHECK2-NEXT: ret i32 0, !dbg [[DBG172:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG165:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG173:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -651,64 +651,64 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META169:![0-9]+]], !DIExpression(), [[META170:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META170]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META179:![0-9]+]], !DIExpression(), [[META178]]) // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META180:![0-9]+]], !DIExpression(), [[META181:![0-9]+]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META170]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG175]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META182:![0-9]+]], !DIExpression(), [[META178]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG183:![0-9]+]], !nonnull [[META17]], !align [[META184:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG183]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG185:![0-9]+]] // CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG187:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: #dbg_declare(ptr [[VAR]], [[META179:![0-9]+]], !DIExpression(), [[META186:![0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG187:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0, !dbg [[DBG187]] -// CHECK2-NEXT: ret void, !dbg [[DBG188:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[VAR]], [[META188:![0-9]+]], !DIExpression(), [[META189:![0-9]+]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG190:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG190]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]], !dbg [[DBG190]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0, !dbg [[DBG190]] +// CHECK2-NEXT: ret void, !dbg [[DBG191:![0-9]+]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG178]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG178]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG178]] -// CHECK2-NEXT: unreachable, !dbg [[DBG178]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG187]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG187]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG187]] +// CHECK2-NEXT: unreachable, !dbg [[DBG187]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG189:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG192:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META190:![0-9]+]], !DIExpression(), [[META191:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META193:![0-9]+]], !DIExpression(), [[META194:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META192:![0-9]+]], !DIExpression(), [[META191]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META195:![0-9]+]], !DIExpression(), [[META194]]) // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META193:![0-9]+]], !DIExpression(), [[META191]]) +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META196:![0-9]+]], !DIExpression(), [[META194]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META194:![0-9]+]], !DIExpression(), [[META191]]) -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG195:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG195]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG195]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG195]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG195]] -// CHECK2-NEXT: call void @_Z5tmainIPPcEiT_.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP1]]) #[[ATTR5]], !dbg [[DBG195]] -// CHECK2-NEXT: ret void, !dbg [[DBG195]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META197:![0-9]+]], !DIExpression(), [[META194]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG198:![0-9]+]], !nonnull [[META17]], !align [[META184]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG198]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG198]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG198]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG198]] +// CHECK2-NEXT: call void @_Z5tmainIPPcEiT_.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP1]]) #[[ATTR5]], !dbg [[DBG198]] +// CHECK2-NEXT: ret void, !dbg [[DBG198]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG196:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG199:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META199:![0-9]+]], !DIExpression(), [[META200:![0-9]+]]) -// CHECK2-NEXT: ret void, !dbg [[DBG201:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META202:![0-9]+]], !DIExpression(), [[META203:![0-9]+]]) +// CHECK2-NEXT: ret void, !dbg [[DBG204:![0-9]+]] // // // CHECK3-LABEL: define {{[^@]+}}@main @@ -729,13 +729,12 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK3-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 // CHECK3-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: // CHECK3-NEXT: [[GEP_VLA:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[VLA]], ptr [[GEP_VLA]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]) +// CHECK3-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK3: omp.par.exit: // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]) @@ -750,7 +749,7 @@ int main (int argc, char **argv) { // CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: // CHECK3-NEXT: [[GEP_VLA:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[LOADGEP_VLA:%.*]] = load ptr, ptr [[GEP_VLA]], align 8 +// CHECK3-NEXT: [[LOADGEP_VLA:%.*]] = load ptr, ptr [[GEP_VLA]], align 8, !align [[META3:![0-9]+]] // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -767,13 +766,15 @@ int main (int argc, char **argv) { // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] +// CHECK3: .fini: +// CHECK3-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK3: omp.par.exit.exitStub: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK3-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat { +// CHECK3-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR3:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -793,7 +794,6 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 // CHECK3-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i64 [[TMP3]], ptr [[DOTRELOADED]], align 8 // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: @@ -802,7 +802,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8 // CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK3-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK3: omp.par.exit: // CHECK3-NEXT: ret i32 0 // @@ -811,9 +811,9 @@ int main (int argc, char **argv) { // CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: // CHECK3-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[LOADGEP__RELOADED:%.*]] = load ptr, ptr [[GEP__RELOADED]], align 8 +// CHECK3-NEXT: [[LOADGEP__RELOADED:%.*]] = load ptr, ptr [[GEP__RELOADED]], align 8, !align [[META4:![0-9]+]] // CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load ptr, ptr [[GEP_ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load ptr, ptr [[GEP_ARGC_ADDR]], align 8, !align [[META4]] // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -832,13 +832,15 @@ int main (int argc, char **argv) { // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK3-NEXT: br label [[DOTFINI:%.*]] +// CHECK3: .fini: +// CHECK3-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK3: omp.par.exit.exitStub: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK3-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat { +// CHECK3-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -867,76 +869,74 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG21]] // CHECK4-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META22:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) // CHECK4-NEXT: #dbg_declare(ptr [[VLA]], [[META25:![0-9]+]], !DIExpression(), [[DBG21]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG29:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: // CHECK4-NEXT: [[GEP_VLA:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[VLA]], ptr [[GEP_VLA]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG30:![0-9]+]] -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG29:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK4: omp.par.exit: -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG31:![0-9]+]] -// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]), !dbg [[DBG31]] -// CHECK4-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4, !dbg [[DBG31]] -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG32:![0-9]+]] -// CHECK4-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP4]]), !dbg [[DBG32]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4, !dbg [[DBG32]] -// CHECK4-NEXT: ret i32 [[TMP5]], !dbg [[DBG32]] +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG30:![0-9]+]] +// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]), !dbg [[DBG30]] +// CHECK4-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4, !dbg [[DBG30]] +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG31:![0-9]+]] +// CHECK4-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP4]]), !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: ret i32 [[TMP5]], !dbg [[DBG31]] // // // CHECK4-LABEL: define {{[^@]+}}@main..omp_par -// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG33:![0-9]+]] { +// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG32:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: // CHECK4-NEXT: [[GEP_VLA:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[LOADGEP_VLA:%.*]] = load ptr, ptr [[GEP_VLA]], align 8 +// CHECK4-NEXT: [[LOADGEP_VLA:%.*]] = load ptr, ptr [[GEP_VLA]], align 8, !align [[META34:![0-9]+]] // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_VLA]], [[META36:![0-9]+]], !DIExpression(), [[META37:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_VLA]], [[META35:![0-9]+]], !DIExpression(), [[META36:![0-9]+]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG35:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG35]] -// CHECK4-NEXT: call void @_Z3fooIiEvT_(i32 noundef [[TMP2]]), !dbg [[DBG35]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG35]] -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]] -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG35]] -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG37:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG37]] +// CHECK4-NEXT: call void @_Z3fooIiEvT_(i32 noundef [[TMP2]]), !dbg [[DBG37]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG37]] +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG37]] +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG37]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG37]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4-NEXT: br label [[DOTFINI:%.*]] // CHECK4: .fini: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG37]] // CHECK4: omp.par.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG36:![0-9]+]] { +// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR3:[0-9]+]] comdat !dbg [[DBG38:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META42:![0-9]+]]) -// CHECK4-NEXT: ret void, !dbg [[META42]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META44:![0-9]+]]) +// CHECK4-NEXT: ret void, !dbg [[META44]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG43:![0-9]+]] { +// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG45:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK4-NEXT: [[DOTRELOADED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG50:![0-9]+]] -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG50]] -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG50]] -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG50]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG50]] -// CHECK4-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG50]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG51:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META57:![0-9]+]], !DIExpression(), [[META58:![0-9]+]]) +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG59:![0-9]+]] +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG59]] +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG59]] +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG59]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG59]] +// CHECK4-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG59]] // CHECK4-NEXT: store i64 [[TMP3]], ptr [[DOTRELOADED]], align 8 // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: @@ -944,51 +944,51 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store ptr [[DOTRELOADED]], ptr [[GEP__RELOADED]], align 8 // CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG52:![0-9]+]] -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG60:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK4: omp.par.exit: -// CHECK4-NEXT: ret i32 0, !dbg [[DBG54:![0-9]+]] +// CHECK4-NEXT: ret i32 0, !dbg [[DBG62:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par -// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !dbg [[DBG55:![0-9]+]] { +// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !dbg [[DBG63:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: // CHECK4-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[LOADGEP__RELOADED:%.*]] = load ptr, ptr [[GEP__RELOADED]], align 8 +// CHECK4-NEXT: [[LOADGEP__RELOADED:%.*]] = load ptr, ptr [[GEP__RELOADED]], align 8, !align [[META64:![0-9]+]] // CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -// CHECK4-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load ptr, ptr [[GEP_ARGC_ADDR]], align 8 +// CHECK4-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load ptr, ptr [[GEP_ARGC_ADDR]], align 8, !align [[META64]] // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load i64, ptr [[LOADGEP__RELOADED]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_ARGC_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_ARGC_ADDR]], [[META65:![0-9]+]], !DIExpression(), [[META66:![0-9]+]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG56:![0-9]+]] -// CHECK4-NEXT: call void @_Z3fooIPPcEvT_(ptr noundef [[TMP3]]), !dbg [[DBG56]] -// CHECK4-NEXT: #dbg_declare(ptr [[VAR]], [[META58:![0-9]+]], !DIExpression(), [[META65:![0-9]+]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[META65]] -// CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[META65]] -// CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 [[TMP5]], !dbg [[META65]] -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX2]], i64 0, !dbg [[META65]] -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG66:![0-9]+]] +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG67:![0-9]+]] +// CHECK4-NEXT: call void @_Z3fooIPPcEvT_(ptr noundef [[TMP3]]), !dbg [[DBG67]] +// CHECK4-NEXT: #dbg_declare(ptr [[VAR]], [[META69:![0-9]+]], !DIExpression(), [[META70:![0-9]+]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[META70]] +// CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[META70]] +// CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 [[TMP5]], !dbg [[META70]] +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX2]], i64 0, !dbg [[META70]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG71:![0-9]+]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4-NEXT: br label [[DOTFINI:%.*]] // CHECK4: .fini: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG66]] +// CHECK4-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]], !dbg [[DBG71]] // CHECK4: omp.par.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG69:![0-9]+]] { +// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG74:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META72:![0-9]+]], !DIExpression(), [[META73:![0-9]+]]) -// CHECK4-NEXT: ret void, !dbg [[META73]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) +// CHECK4-NEXT: ret void, !dbg [[META78]] // diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp index 72653144d08d..719a870b8586 100644 --- a/clang/test/OpenMP/taskgroup_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_codegen.cpp @@ -65,7 +65,7 @@ void parallel_taskgroup() { // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[A]], align 1 @@ -73,7 +73,7 @@ void parallel_taskgroup() { // CHECK1-NEXT: ret i32 [[CONV]] // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } -// CHECK1-NEXT: catch ptr null +// CHECK1-NEXT: catch ptr null // CHECK1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0 // CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8:[0-9]+]] // CHECK1-NEXT: unreachable @@ -104,13 +104,13 @@ void parallel_taskgroup() { // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } -// CHECK1-NEXT: catch ptr null +// CHECK1-NEXT: catch ptr null // CHECK1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0 // CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8]] // CHECK1-NEXT: unreachable @@ -128,14 +128,14 @@ void parallel_taskgroup() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[A:%.*]] = alloca i8, align 1 -// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]] // DEBUG1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]), !dbg [[DBG13:![0-9]+]] +// DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]), !dbg [[DBG13]] // DEBUG1-NEXT: store i8 2, ptr [[A]], align 1, !dbg [[DBG14:![0-9]+]] // DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]), !dbg [[DBG15:![0-9]+]] // DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]), !dbg [[DBG16:![0-9]+]] // DEBUG1-NEXT: invoke void @_Z3foov() -// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG17:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG17:![0-9]+]] // DEBUG1: invoke.cont: // DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB3]], i32 [[TMP0]]), !dbg [[DBG17]] // DEBUG1-NEXT: [[TMP1:%.*]] = load i8, ptr [[A]], align 1, !dbg [[DBG18:![0-9]+]] @@ -143,7 +143,7 @@ void parallel_taskgroup() { // DEBUG1-NEXT: ret i32 [[CONV]], !dbg [[DBG19:![0-9]+]] // DEBUG1: terminate.lpad: // DEBUG1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } -// DEBUG1-NEXT: catch ptr null, !dbg [[DBG17]] +// DEBUG1-NEXT: catch ptr null, !dbg [[DBG17]] // DEBUG1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0, !dbg [[DBG17]] // DEBUG1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8:[0-9]+]], !dbg [[DBG17]] // DEBUG1-NEXT: unreachable, !dbg [[DBG17]] @@ -174,13 +174,13 @@ void parallel_taskgroup() { // DEBUG1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG24]] // DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB5:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG24]] // DEBUG1-NEXT: invoke void @_Z3foov() -// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG25:![0-9]+]] // DEBUG1: invoke.cont: // DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB5]], i32 [[TMP1]]), !dbg [[DBG25]] // DEBUG1-NEXT: ret void, !dbg [[DBG26:![0-9]+]] // DEBUG1: terminate.lpad: // DEBUG1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } -// DEBUG1-NEXT: catch ptr null, !dbg [[DBG25]] +// DEBUG1-NEXT: catch ptr null, !dbg [[DBG25]] // DEBUG1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0, !dbg [[DBG25]] // DEBUG1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8]], !dbg [[DBG25]] // DEBUG1-NEXT: unreachable, !dbg [[DBG25]] @@ -219,11 +219,10 @@ void parallel_taskgroup() { // CHECK2-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // CHECK2-SAME: () #[[ATTR0]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK2: omp_parallel: // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z18parallel_taskgroupv..omp_par) -// CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] // CHECK2: omp.par.exit: // CHECK2-NEXT: ret void // @@ -237,17 +236,19 @@ void parallel_taskgroup() { // CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK2: omp.par.region: -// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) // CHECK2-NEXT: call void @_Z3foov() // CHECK2-NEXT: br label [[TASKGROUP_EXIT:%.*]] // CHECK2: taskgroup.exit: -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) // CHECK2-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK2: omp.par.region.parallel.after: // CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK2: omp.par.pre_finalize: -// CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK2-NEXT: br label [[DOTFINI:%.*]] +// CHECK2: .fini: +// CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] // CHECK2: omp.par.exit.exitStub: // CHECK2-NEXT: ret void // diff --git a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 index fd59d39b552d..c6a46691d58f 100644 --- a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 +++ b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 @@ -165,7 +165,7 @@ end subroutine ! [various blocks implementing the reduction] -! CHECK: omp.region.cont37: ; preds = +! CHECK: omp.region.cont36: ; preds = ! CHECK-NEXT: %{{.*}} = phi ptr ! CHECK-NEXT: call void @__kmpc_end_reduce( ! CHECK-NEXT: br label %reduce.finalize @@ -182,18 +182,18 @@ end subroutine ! CHECK: omp.reduction.cleanup: ; preds = %.fini ! [null check] -! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup43, label %omp.reduction.cleanup44 +! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup42, label %omp.reduction.cleanup43 -! CHECK: omp.reduction.cleanup44: ; preds = %omp.reduction.cleanup43, %omp.reduction.cleanup -! CHECK-NEXT: br label %omp.region.cont42 +! CHECK: omp.reduction.cleanup43: ; preds = %omp.reduction.cleanup42, %omp.reduction.cleanup +! CHECK-NEXT: br label %omp.region.cont41 -! CHECK: omp.region.cont42: ; preds = %omp.reduction.cleanup44 +! CHECK: omp.region.cont41: ; preds = %omp.reduction.cleanup43 ! CHECK-NEXT: %{{.*}} = load ptr, ptr -! CHECK-NEXT: br label %omp.reduction.cleanup46 +! CHECK-NEXT: br label %omp.reduction.cleanup45 -! CHECK: omp.reduction.cleanup46: ; preds = %omp.region.cont42 +! CHECK: omp.reduction.cleanup45: ; preds = %omp.region.cont41 ! [null check] -! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup47, label %omp.reduction.cleanup48 +! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup46, label %omp.reduction.cleanup47 ! CHECK: omp.par.region30: ; preds = %omp.par.region29 ! CHECK-NEXT: call void @_FortranAStopStatement @@ -222,5 +222,5 @@ end subroutine ! [var extent was non-zero: malloc a private array] ! CHECK: br label %omp.private.init5 -! CHECK: omp.par.exit.exitStub: ; preds = %omp.region.cont52 +! CHECK: omp.par.exit.exitStub: ; preds = %omp.region.cont51 ! CHECK-NEXT: ret void diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 2ae0d70acdec..6775674d733f 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1587,7 +1587,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( uint32_t SrcLocStrSize; Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); - Value *ThreadID = getOrCreateThreadID(Ident); + const bool NeedThreadID = NumThreads || Config.isTargetDevice() || + (ProcBind != OMP_PROC_BIND_default); + Value *ThreadID = NeedThreadID ? getOrCreateThreadID(Ident) : nullptr; // If we generate code for the target device, we need to allocate // struct for aggregate params in the device default alloca address space. // OpenMP runtime requires that the params of the extracted functions are diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll index 1bbac5cc3154..deeddce2e395 100644 --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4676,13 +4676,12 @@ entry: ; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 ; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @merge..omp_par, ptr [[STRUCTARG]]) +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -4693,7 +4692,7 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2:![0-9]+]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -4712,7 +4711,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp.par.exit.exitStub: ; CHECK2-NEXT: ret void ; @@ -4822,13 +4823,12 @@ entry: ; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 ; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -4841,7 +4841,7 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -4869,7 +4869,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: @@ -4911,7 +4913,6 @@ entry: ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK2-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: store float [[F]], ptr [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: @@ -4922,7 +4923,7 @@ entry: ; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 ; CHECK2-NEXT: store ptr [[P]], ptr [[GEP_P]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_float..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -4933,11 +4934,11 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load ptr, ptr [[GEP_F_RELOADED]], align 8 +; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load ptr, ptr [[GEP_F_RELOADED]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load ptr, ptr [[GEP_F_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load ptr, ptr [[GEP_F_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load ptr, ptr [[GEP_P]], align 8 +; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load ptr, ptr [[GEP_P]], align 8, !align [[META5:![0-9]+]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -4966,7 +4967,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: @@ -5009,7 +5012,6 @@ entry: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -5017,7 +5019,7 @@ entry: ; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 ; CHECK2-NEXT: store ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_firstprivate..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5030,9 +5032,9 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8, !align [[META6:![0-9]+]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5061,7 +5063,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: @@ -5104,13 +5108,12 @@ entry: ; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 ; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5122,7 +5125,7 @@ entry: ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5150,7 +5153,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: @@ -5190,40 +5195,33 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 ; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], ptr [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 ; CHECK2-NEXT: store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 ; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 -; CHECK2-NEXT: store ptr [[B]], ptr [[GEP_B]], align 8 -; CHECK2-NEXT: call void @llvm.lifetime.start.p0(ptr [[B]]) ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_par_use..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: -; CHECK2-NEXT: call void @llvm.lifetime.end.p0(ptr noundef nonnull [[B]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_B:%.*]] = load ptr, ptr [[GEP_B]], align 8 +; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8, !align [[META2]] +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5245,19 +5243,23 @@ entry: ; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_B]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[B]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: call void @llvm.lifetime.end.p0(ptr noundef nonnull [[B]]) +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: +; CHECK2-NEXT: call void @llvm.lifetime.start.p0(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -; CHECK2-NEXT: store i32 [[ADD]], ptr [[LOADGEP_B]], align 4 +; CHECK2-NEXT: store i32 [[ADD]], ptr [[B]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5296,7 +5298,6 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -5304,7 +5305,7 @@ entry: ; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 ; CHECK2-NEXT: store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5315,9 +5316,9 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5336,7 +5337,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp.par.exit.exitStub: ; CHECK2-NEXT: ret void ; @@ -5348,7 +5351,7 @@ entry: ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META7:![0-9]+]] ; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: @@ -5362,7 +5365,7 @@ entry: ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META7]] ; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: @@ -5378,7 +5381,6 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: @@ -5389,7 +5391,7 @@ entry: ; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 ; CHECK2-NEXT: store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions_seq..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5400,11 +5402,11 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load ptr, ptr [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load ptr, ptr [[GEP_CANCEL1_RELOADED]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5433,7 +5435,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: @@ -5459,7 +5463,7 @@ entry: ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META7]] ; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: @@ -5473,7 +5477,7 @@ entry: ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META7]] ; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: @@ -5486,13 +5490,12 @@ entry: ; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 ; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5503,7 +5506,7 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5525,7 +5528,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp.par.exit.exitStub: ; CHECK2-NEXT: ret void ; @@ -5563,7 +5568,6 @@ entry: ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], ptr [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: @@ -5576,7 +5580,7 @@ entry: ; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 3 ; CHECK2-NEXT: store ptr [[ADD1_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3_seq..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5589,13 +5593,13 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8, !align [[META2]] ; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 3 -; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5623,10 +5627,10 @@ entry: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -; CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY6:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: ; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]]) @@ -5636,9 +5640,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[FINI:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] ; CHECK2: .fini: -; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp_region.body6: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK2: seq.par.merged2: @@ -5647,10 +5651,10 @@ entry: ; CHECK2-NEXT: store i32 [[ADD1]], ptr [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split: -; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK2-NEXT: br label [[OMP_REGION_BODY6_SPLIT:%.*]] ; CHECK2: omp_region.body6.split: ; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE5:%.*]] -; CHECK2: omp_region.finalize{{.*}}: +; CHECK2: omp_region.finalize5: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: @@ -5811,13 +5815,12 @@ entry: ; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 ; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[STRUCTARG]]) -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2-NEXT: br label [[OMP_PAR_EXIT:%.*]] ; CHECK2: omp.par.exit: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: @@ -5830,7 +5833,7 @@ entry: ; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8, !align [[META2]] ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -5849,7 +5852,9 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2-NEXT: br label [[DOTFINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:%.*]] ; CHECK2: omp.par.exit.exitStub: ; CHECK2-NEXT: ret void ; diff --git a/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir b/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir index dad32b48e541..2ae3cacee791 100644 --- a/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir +++ b/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir @@ -23,8 +23,8 @@ llvm.func @distribute_wsloop_dist_schedule_chunked_schedule_chunked(%n: i32, %te llvm.return } // CHECK: define internal void @distribute_wsloop_dist_schedule_chunked_schedule_chunked..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 64) -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 %3) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 64) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 %3) llvm.func @distribute_wsloop_dist_schedule_chunked_schedule_chunked_i64(%n: i32, %teams: i32, %threads: i32) { %0 = llvm.mlir.constant(0 : i64) : i64 @@ -49,8 +49,8 @@ llvm.func @distribute_wsloop_dist_schedule_chunked_schedule_chunked_i64(%n: i32, llvm.return } // CHECK: define internal void @distribute_wsloop_dist_schedule_chunked_schedule_chunked_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 64) -// call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 1024) +// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 64) +// call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 1024) // ----- @@ -75,8 +75,8 @@ llvm.func @distribute_wsloop_dist_schedule_chunked(%n: i32, %teams: i32, %thread llvm.return } // CHECK: define internal void @distribute_wsloop_dist_schedule_chunked..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 1024) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 1024) llvm.func @distribute_wsloop_dist_schedule_chunked_i64(%n: i32, %teams: i32, %threads: i32) { %0 = llvm.mlir.constant(0 : i64) : i64 @@ -100,8 +100,8 @@ llvm.func @distribute_wsloop_dist_schedule_chunked_i64(%n: i32, %teams: i32, %th llvm.return } // CHECK: define internal void @distribute_wsloop_dist_schedule_chunked_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 0) -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 1024) +// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 0) +// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 1024) // ----- @@ -126,8 +126,8 @@ llvm.func @distribute_wsloop_schedule_chunked(%n: i32, %teams: i32, %threads: i3 llvm.return } // CHECK: define internal void @distribute_wsloop_schedule_chunked..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 64) -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 64) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) llvm.func @distribute_wsloop_schedule_chunked_i64(%n: i32, %teams: i32, %threads: i32) { %0 = llvm.mlir.constant(0 : i64) : i64 @@ -152,8 +152,8 @@ llvm.func @distribute_wsloop_schedule_chunked_i64(%n: i32, %teams: i32, %threads } // CHECK: define internal void @distribute_wsloop_schedule_chunked_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 64) -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 0) +// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 64) +// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 0) // ----- @@ -177,8 +177,8 @@ llvm.func @distribute_wsloop_no_chunks(%n: i32, %teams: i32, %threads: i32) { llvm.return } // CHECK: define internal void @distribute_wsloop_no_chunks..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_dist_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound, ptr %p.stride, i32 1, i32 0) -// CHECK: call void @__kmpc_dist_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound10, ptr %p.stride, i32 1, i32 0) +// CHECK: call void @__kmpc_dist_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound, ptr %p.stride, i32 1, i32 0) +// CHECK: call void @__kmpc_dist_for_static_init_4u(ptr @1, i32 %omp_global_thread_num8, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound9, ptr %p.stride, i32 1, i32 0) llvm.func @distribute_wsloop_no_chunks_i64(%n: i32, %teams: i32, %threads: i32) { %0 = llvm.mlir.constant(0 : i64) : i64 @@ -201,5 +201,5 @@ llvm.func @distribute_wsloop_no_chunks_i64(%n: i32, %teams: i32, %threads: i32) llvm.return } // CHECK: define internal void @distribute_wsloop_no_chunks_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_dist_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound, ptr %p.stride, i64 1, i64 0) -// CHECK: call void @__kmpc_dist_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound10, ptr %p.stride, i64 1, i64 0) \ No newline at end of file +// CHECK: call void @__kmpc_dist_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound, ptr %p.stride, i64 1, i64 0) +// CHECK: call void @__kmpc_dist_for_static_init_8u(ptr @1, i32 %omp_global_thread_num8, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound9, ptr %p.stride, i64 1, i64 0) \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index a03c19559e9c..fcb937dbc186 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -156,11 +156,10 @@ llvm.func @test_omp_parallel_if_1(%arg0: i32) -> () { // CHECK: %[[IF_COND_VAR_1:.*]] = icmp slt i32 %[[IF_EXPR_1]], 0 -// CHECK: %[[GTN_IF_1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[SI_VAR_IF_1:.*]]) // CHECK: br label %[[OUTLINED_CALL_IF_BLOCK_1:.*]] // CHECK: [[OUTLINED_CALL_IF_BLOCK_1]]: // CHECK: %[[I32_IF_COND_VAR_1:.*]] = sext i1 %[[IF_COND_VAR_1]] to i32 -// CHECK: call void @__kmpc_fork_call_if(ptr @[[SI_VAR_IF_1]], i32 0, ptr @[[OMP_OUTLINED_FN_IF_1:.*]], i32 %[[I32_IF_COND_VAR_1]], ptr null) +// CHECK: call void @__kmpc_fork_call_if(ptr @[[SI_VAR_IF_1:.*]], i32 0, ptr @[[OMP_OUTLINED_FN_IF_1:.*]], i32 %[[I32_IF_COND_VAR_1]], ptr null) // CHECK: br label %[[OUTLINED_EXIT_IF_1:.*]] omp.parallel if(%1) { omp.barrier diff --git a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir index 99f37c7e79be..fdc789c33d5f 100644 --- a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir +++ b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir @@ -13,7 +13,6 @@ llvm.func @parallel_infinite_loop() -> () { } // CHECK-LABEL: define void @parallel_infinite_loop() { -// CHECK: %[[VAL_2:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: br label %[[VAL_3:.*]] // CHECK: omp_parallel: // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 0, ptr @parallel_infinite_loop..omp_par) diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir index c92c16b2a370..81276732fa3a 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir @@ -45,7 +45,6 @@ llvm.func @malloc(%arg0 : i64) -> !llvm.ptr llvm.func @free(%arg0 : !llvm.ptr) -> () -// CHECK: %{{.+}} = // Call to the outlined function. // CHECK: call void {{.*}} @__kmpc_fork_call // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir index c79c369b69d7..42000405e055 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir @@ -31,7 +31,6 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: ! // CHECK: %[[VAL_0:.*]] = alloca { ptr, ptr }, align 8 // CHECK: br label %[[VAL_1:.*]] // CHECK: entry: ; preds = %[[VAL_2:.*]] -// CHECK: %[[VAL_3:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: br label %[[VAL_4:.*]] // CHECK: omp_parallel: ; preds = %[[VAL_1]] // CHECK: %[[VAL_5:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_0]], i32 0, i32 0 diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir index bd3b77587b8a..bb1d9c68b38e 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -142,17 +142,17 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: br label %[[VAL_46:.*]] // CHECK: omp.reduction.nonatomic.body: ; preds = %[[VAL_43]] // CHECK: br label %[[VAL_47:.*]] -// CHECK: omp.reduction.nonatomic.body16: ; preds = %[[VAL_48:.*]], %[[VAL_46]] +// CHECK: omp.reduction.nonatomic.body15: ; preds = %[[VAL_48:.*]], %[[VAL_46]] // CHECK: %[[VAL_49:.*]] = phi i64 [ %[[VAL_50:.*]], %[[VAL_48]] ], [ 0, %[[VAL_46]] ] // CHECK: %[[VAL_51:.*]] = icmp sgt i64 %[[VAL_49]], 0 // CHECK: br i1 %[[VAL_51]], label %[[VAL_48]], label %[[VAL_52:.*]] -// CHECK: omp.reduction.nonatomic.body18: ; preds = %[[VAL_47]] +// CHECK: omp.reduction.nonatomic.body17: ; preds = %[[VAL_47]] // CHECK: br label %[[VAL_53:.*]] -// CHECK: omp.region.cont15: ; preds = %[[VAL_52]] +// CHECK: omp.region.cont14: ; preds = %[[VAL_52]] // CHECK: %[[VAL_54:.*]] = phi ptr [ %[[VAL_19]], %[[VAL_52]] ] // CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_40]], ptr @.gomp_critical_user_.reduction.var) // CHECK: br label %[[VAL_42]] -// CHECK: omp.reduction.nonatomic.body17: ; preds = %[[VAL_47]] +// CHECK: omp.reduction.nonatomic.body16: ; preds = %[[VAL_47]] // CHECK: %[[VAL_50]] = sub i64 %[[VAL_49]], 1 // CHECK: br label %[[VAL_47]] // CHECK: reduce.finalize: ; preds = %[[VAL_53]], %[[VAL_37]] @@ -164,9 +164,9 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: %[[VAL_58:.*]] = ptrtoint ptr %[[VAL_56]] to i64 // CHECK: %[[VAL_59:.*]] = icmp ne i64 %[[VAL_58]], 0 // CHECK: br i1 %[[VAL_59]], label %[[VAL_60:.*]], label %[[VAL_61:.*]] -// CHECK: omp.reduction.cleanup22: ; preds = %[[VAL_60]], %[[VAL_57]] +// CHECK: omp.reduction.cleanup21: ; preds = %[[VAL_60]], %[[VAL_57]] // CHECK: br label %[[VAL_62:.*]] -// CHECK: omp.region.cont20: ; preds = %[[VAL_61]] +// CHECK: omp.region.cont19: ; preds = %[[VAL_61]] // CHECK: br label %[[VAL_63:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_62]] // CHECK: br label %[[VAL_64:.*]] @@ -174,7 +174,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: br label %[[FINI:.fini.*]] // CHECK: [[FINI]]: // CHECK: br label %[[EXIT:.*]] -// CHECK: omp.reduction.cleanup21: ; preds = %[[VAL_57]] +// CHECK: omp.reduction.cleanup20: ; preds = %[[VAL_57]] // CHECK: br label %[[VAL_61]] // CHECK: omp_section_loop.body: ; preds = %[[VAL_32]] // CHECK: %[[VAL_66:.*]] = add i32 %[[VAL_30]], %[[VAL_24]] diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir index a0ca31b7d811..cc7a52bcc41d 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir @@ -33,7 +33,6 @@ llvm.return %0 : i32 } -// CHECK: %{{.+}} = // Call to the outlined function. // CHECK: call void {{.*}} @__kmpc_fork_call // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir index cb30d3b2f447..d3be9ea2394c 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir @@ -37,7 +37,6 @@ module { // CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: br label %[[VAL_3:.*]] // CHECK: entry: ; preds = %[[VAL_4:.*]] -// CHECK: %[[VAL_5:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: br label %[[VAL_6:.*]] // CHECK: omp_parallel: ; preds = %[[VAL_3]] // CHECK: %[[VAL_7:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_0]], i32 0, i32 0