
Previously, after the algorithm fixpointed, the state was manually patched by emitting BDVs for EE instructions earlier, while marking some (but not all) vector and vector<->scalar instructions as conflict. This causes issues as not all instructions that required BDVs had them emitted and due to after-fixpoint patching, the extra BDVs did not propagate to their users. This change fixes both by rewriting the logic for BDV insertion & patching. Instead of inserting the BDV for EE earlier, it merely marks every EE instruction as a conflict. The two phase insertion algorithm (first insert empty instructions and patch the BDVState, then actually connect the BDV instructions to their input bases) then ensures correct propagation to all its users. Furthermore the shufflevector instruction as well as all instances of IE instruction are conservatively marked as conflicts as well, fixing the second problem. This change does not fix the handling of constant values and vectors in the BDV. --------- Co-authored-by: Petr Maj <pmaj@azul.com>
57 lines
4.5 KiB
LLVM
57 lines
4.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
define void @barney() gc "statepoint-example" {
|
|
; CHECK-LABEL: define void @barney() gc "statepoint-example" {
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
|
|
; CHECK-NEXT: [[EXTRACTELEMENT_BASE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
|
|
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x ptr addrspace(1)> [[GETELEMENTPTR]], i32 0
|
|
; CHECK-NEXT: [[INSERTELEMENT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT_BASE]], i32 0, !is_base_value !0
|
|
; CHECK-NEXT: [[INSERTELEMENT:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT]], i32 0
|
|
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[INSERTELEMENT]], <2 x ptr addrspace(1)> [[INSERTELEMENT_BASE]]) ]
|
|
; CHECK-NEXT: [[INSERTELEMENT_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
|
|
; CHECK-NEXT: [[INSERTELEMENT_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
|
|
; CHECK-NEXT: [[EXTRACTELEMENT1:%.*]] = extractelement <2 x ptr addrspace(1)> [[INSERTELEMENT_RELOCATED]], i32 0
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb:
|
|
%getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
|
|
%extractelement = extractelement <2 x ptr addrspace(1)> %getelementptr, i32 0
|
|
%insertelement = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %extractelement, i32 0
|
|
%call = call i8 @foo()
|
|
%extractelement1 = extractelement <2 x ptr addrspace(1)> %insertelement, i32 0
|
|
ret void
|
|
}
|
|
|
|
|
|
; same as above, but ensures that transitive uses of insertelement emit proper code as well
|
|
define void @bart() gc "statepoint-example" {
|
|
; CHECK-LABEL: define void @bart() gc "statepoint-example" {
|
|
; CHECK-NEXT: always_continue:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
|
|
; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr addrspace(1)> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[BASE_IE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[OTHER_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> [[BASE_IE]], ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
|
|
; CHECK-NEXT: [[OTHER:%.*]] = insertelement <2 x ptr addrspace(1)> [[TMP2]], ptr addrspace(1) [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[OTHER]], <2 x ptr addrspace(1)> [[OTHER_BASE]]) ]
|
|
; CHECK-NEXT: [[OTHER_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
|
|
; CHECK-NEXT: [[OTHER_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr addrspace(1)> [[OTHER_RELOCATED]], i32 0
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
always_continue:
|
|
%0 = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
|
|
%1 = extractelement <2 x ptr addrspace(1)> %0, i32 0
|
|
%2 = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %1, i32 0
|
|
%other = insertelement <2 x ptr addrspace(1)> %2, ptr addrspace(1) %1, i32 0
|
|
%3 = call i8 @foo()
|
|
%4 = extractelement <2 x ptr addrspace(1)> %other, i32 0
|
|
ret void
|
|
}
|
|
|
|
declare i8 @foo() |