Petr Maj a1358225c5
Improvements to RS4GC BDV Algorithm (#69795)
Previously, after the algorithm fixpointed, the state was manually
patched by emitting BDVs for EE instructions earlier, while marking some
(but not all) vector and vector<->scalar instructions as conflict. This
causes issues as not all instructions that required BDVs had them
emitted and due to after-fixpoint patching, the extra BDVs did not
propagate to their users.

This change fixes both by rewriting the logic for BDV insertion &
patching. Instead of inserting the BDV for EE earlier, it merely marks
every EE instruction as a conflict. The two phase insertion algorithm
(first insert empty instructions and patch the BDVState, then actually
connect the BDV instructions to their input bases) then ensures correct
propagation to all its users. Furthermore the shufflevector instruction
as well as all instances of IE instruction are conservatively marked as
conflicts as well, fixing the second problem.

This change does not fix the handling of constant values and vectors in
the BDV. 

---------

Co-authored-by: Petr Maj <pmaj@azul.com>
2023-11-02 20:19:40 -04:00

57 lines
4.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
define void @barney() gc "statepoint-example" {
; CHECK-LABEL: define void @barney() gc "statepoint-example" {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
; CHECK-NEXT: [[EXTRACTELEMENT_BASE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x ptr addrspace(1)> [[GETELEMENTPTR]], i32 0
; CHECK-NEXT: [[INSERTELEMENT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT_BASE]], i32 0, !is_base_value !0
; CHECK-NEXT: [[INSERTELEMENT:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT]], i32 0
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[INSERTELEMENT]], <2 x ptr addrspace(1)> [[INSERTELEMENT_BASE]]) ]
; CHECK-NEXT: [[INSERTELEMENT_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
; CHECK-NEXT: [[INSERTELEMENT_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
; CHECK-NEXT: [[EXTRACTELEMENT1:%.*]] = extractelement <2 x ptr addrspace(1)> [[INSERTELEMENT_RELOCATED]], i32 0
; CHECK-NEXT: ret void
;
bb:
%getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
%extractelement = extractelement <2 x ptr addrspace(1)> %getelementptr, i32 0
%insertelement = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %extractelement, i32 0
%call = call i8 @foo()
%extractelement1 = extractelement <2 x ptr addrspace(1)> %insertelement, i32 0
ret void
}
; same as above, but ensures that transitive uses of insertelement emit proper code as well
define void @bart() gc "statepoint-example" {
; CHECK-LABEL: define void @bart() gc "statepoint-example" {
; CHECK-NEXT: always_continue:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr addrspace(1)> [[TMP0]], i32 0
; CHECK-NEXT: [[BASE_IE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[TMP1]], i32 0
; CHECK-NEXT: [[OTHER_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> [[BASE_IE]], ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
; CHECK-NEXT: [[OTHER:%.*]] = insertelement <2 x ptr addrspace(1)> [[TMP2]], ptr addrspace(1) [[TMP1]], i32 0
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[OTHER]], <2 x ptr addrspace(1)> [[OTHER_BASE]]) ]
; CHECK-NEXT: [[OTHER_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
; CHECK-NEXT: [[OTHER_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr addrspace(1)> [[OTHER_RELOCATED]], i32 0
; CHECK-NEXT: ret void
;
always_continue:
%0 = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
%1 = extractelement <2 x ptr addrspace(1)> %0, i32 0
%2 = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %1, i32 0
%other = insertelement <2 x ptr addrspace(1)> %2, ptr addrspace(1) %1, i32 0
%3 = call i8 @foo()
%4 = extractelement <2 x ptr addrspace(1)> %other, i32 0
ret void
}
declare i8 @foo()