
Previously, after the algorithm fixpointed, the state was manually patched by emitting BDVs for EE instructions earlier, while marking some (but not all) vector and vector<->scalar instructions as conflict. This causes issues as not all instructions that required BDVs had them emitted and due to after-fixpoint patching, the extra BDVs did not propagate to their users. This change fixes both by rewriting the logic for BDV insertion & patching. Instead of inserting the BDV for EE earlier, it merely marks every EE instruction as a conflict. The two phase insertion algorithm (first insert empty instructions and patch the BDVState, then actually connect the BDV instructions to their input bases) then ensures correct propagation to all its users. Furthermore the shufflevector instruction as well as all instances of IE instruction are conservatively marked as conflicts as well, fixing the second problem. This change does not fix the handling of constant values and vectors in the BDV. --------- Co-authored-by: Petr Maj <pmaj@azul.com>
29 lines
1.0 KiB
LLVM
29 lines
1.0 KiB
LLVM
; RUN: opt -S -passes=rewrite-statepoints-for-gc < %s | FileCheck %s
|
|
;
|
|
; A test to make sure that we can look through bitcasts of
|
|
; vector types when a base pointer is contained in a vector.
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
declare ptr addrspace(1) @foo()
|
|
|
|
; Function Attrs: uwtable
|
|
define i32 @test() gc "statepoint-example" {
|
|
; CHECK-LABEL: @test
|
|
entry:
|
|
; CHECK-LABEL: entry
|
|
; CHECK: %bc = bitcast
|
|
; CHECK: %[[p1:[A-Za-z0-9_.]+]] = extractelement
|
|
; CHECK: %[[p2:[A-Za-z0-9_]+]] = extractelement
|
|
; CHECK: llvm.experimental.gc.statepoint
|
|
; CHECK: %[[p2]].relocated = {{.+}} @llvm.experimental.gc.relocate
|
|
; CHECK: %[[p1]].relocated = {{.+}} @llvm.experimental.gc.relocate
|
|
; CHECK: load atomic
|
|
%bc = bitcast <8 x ptr addrspace(1)> undef to <8 x ptr addrspace(1)>
|
|
%ptr= extractelement <8 x ptr addrspace(1)> %bc, i32 7
|
|
%0 = call ptr addrspace(1) @foo() [ "deopt"() ]
|
|
%1 = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
|
|
ret i32 %1
|
|
}
|