diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index fce06dfde0ed..633bddd9a7eb 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -690,6 +690,17 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, } break; } + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: { + KnownBits SrcOpKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, + Depth + 1); + // If we have a known 1, its position is our upper bound + unsigned PossibleTZ = SrcOpKnown.countMaxTrailingZeros(); + unsigned LowBits = llvm::bit_width(PossibleTZ); + Known.Zero.setBitsFrom(LowBits); + break; + } case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTLZ_ZERO_UNDEF: { KnownBits SrcOpKnown; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-cttz.mir new file mode 100644 index 000000000000..0c66517f31bd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-cttz.mir @@ -0,0 +1,111 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -passes="print" -filetype=null %s 2>&1 | FileCheck %s + +--- +name: CTTZNoKnown8 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZNoKnown8 + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:000000000000???? SignBits:12 + %0:_(s8) = COPY $b0 + %1:_(s16) = G_CTTZ %0 +... + +--- +name: CTTZNoKnown16 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZNoKnown16 + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000000000????? SignBits:11 + %0:_(s16) = COPY $h0 + %1:_(s16) = G_CTTZ %0 +... + +--- +name: CTTZNoKnown32 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZNoKnown32 + ; CHECK-NEXT: %0:_ KnownBits:???????????????????????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000?????? SignBits:10 + %0:_(s32) = COPY $s0 + %1:_(s16) = G_CTTZ %0 +... + + +--- +name: CTTZHalfKnown8 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZHalfKnown8 + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000100 SignBits:5 + ; CHECK-NEXT: %2:_ KnownBits:?????1?? SignBits:1 + ; CHECK-NEXT: %3:_ KnownBits:000000?? SignBits:6 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 4 + %2:_(s8) = G_OR %0, %1 + %3:_(s8) = G_CTTZ %2 +... + +--- +name: CTTZHalfKnown16 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZHalfKnown16 + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000001000000 SignBits:9 + ; CHECK-NEXT: %2:_ KnownBits:?????????1?????? SignBits:1 + ; CHECK-NEXT: %3:_ KnownBits:0000000000000??? SignBits:13 + %0:_(s16) = COPY $h0 + %1:_(s16) = G_CONSTANT i16 64 + %2:_(s16) = G_OR %0, %1 + %3:_(s16) = G_CTTZ %2 +... + +--- +name: CTTZHalfConst8Zero +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZHalfConst8Zero + ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8 + ; CHECK-NEXT: %1:_ KnownBits:0000???? SignBits:4 + %0:_(s8) = G_CONSTANT i8 0 + %1:_(s8) = G_CTTZ %0 +... + +--- +name: CTTZHalfConst16Zero +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZHalfConst16Zero + ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16 + ; CHECK-NEXT: %1:_ KnownBits:00000000000????? SignBits:11 + %0:_(s16) = G_CONSTANT i16 0 + %1:_(s16) = G_CTTZ %0 +... + +--- +name: CTTZHalfConst8 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZHalfConst8 + ; CHECK-NEXT: %0:_ KnownBits:00000100 SignBits:5 + ; CHECK-NEXT: %1:_ KnownBits:000000?? SignBits:6 + %0:_(s8) = G_CONSTANT i8 4 + %1:_(s8) = G_CTTZ %0 +... + +--- +name: CTTZHalfConst16 +body: | + bb.1: + ; CHECK-LABEL: name: @CTTZHalfConst16 + ; CHECK-NEXT: %0:_ KnownBits:0000000000000100 SignBits:13 + ; CHECK-NEXT: %1:_ KnownBits:00000000000000?? SignBits:14 + %0:_(s16) = G_CONSTANT i16 4 + %1:_(s16) = G_CTTZ %0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir index ab82a1bb5cc6..ecad6cb8bbd1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -64,9 +64,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[CTTZ_ZERO_UNDEF]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTTZ_ZERO_UNDEF %0 %2:_(s32) = G_ZEXT %1 @@ -85,9 +83,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTTZ_ZERO_UNDEF %1 @@ -152,11 +148,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[LSHR]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -177,9 +170,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTTZ_ZERO_UNDEF %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index 0ef31a602961..46d5bdbd309c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -93,9 +93,7 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTTZ %1 @@ -169,11 +167,8 @@ body: | ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -196,9 +191,7 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTTZ %1 diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll index d17cdeb8917f..6cc0f9f99823 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz.ll @@ -1561,7 +1561,6 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir index 8255e01d7186..148aac1ab46f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -18,9 +18,7 @@ body: | ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; X64-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]] ; X64-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTTZ_ZERO_UNDEF [[OR]](s64) - ; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738367 - ; X64-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[CTTZ_ZERO_UNDEF]], [[C1]] - ; X64-NEXT: RET 0, implicit [[AND]](s64) + ; X64-NEXT: RET 0, implicit [[CTTZ_ZERO_UNDEF]](s64) ; ; X86-LABEL: name: test_cttz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir index a429e89e8f25..17469289821f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -18,9 +18,7 @@ body: | ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; X64-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]] ; X64-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTTZ_ZERO_UNDEF [[OR]](s64) - ; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738367 - ; X64-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[CTTZ_ZERO_UNDEF]], [[C1]] - ; X64-NEXT: RET 0, implicit [[AND]](s64) + ; X64-NEXT: RET 0, implicit [[CTTZ_ZERO_UNDEF]](s64) ; ; X86-LABEL: name: test_cttz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx