llvm-project/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
Luke Lau d0c1ea928c
[InstCombine] Pull unary shuffles through fneg/fabs (#144933)
This canonicalizes fneg/fabs (shuffle X, poison, mask) -> shuffle
(fneg/fabs X), posion, mask

This undoes part of b331a7ebc1e02f9939d1a4a1509e7eb6cdda3d38 and
a8f13dbdeb31be37ee15b5febb7cc2137bbece67, but keeps the binary shuffle
case i.e. shuffle fneg, fneg, mask.

By pulling out the shuffle we bring it inline with the same
canonicalisation we perform on binary ops and intrinsics, which the
original commit acknowledges it goes in the opposite direction.

However nowadays VectorCombine is more powerful and can do more
optimisations when the shuffle is pulled out, so I think we should
revisit this. In particular we get more shuffles folded and can perform
scalarization.
2025-06-30 10:40:12 +01:00

325 lines
20 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
define <2 x double> @test_negation_move_to_result(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_negation_move_to_result(
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%a.neg = fneg <6 x double> %a
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
ret <2 x double> %res
}
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
; Fast flag should be preserved
define <2 x double> @test_negation_move_to_result_with_fastflags(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_negation_move_to_result_with_fastflags(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: [[RES:%.*]] = fneg fast <2 x double> [[TMP1]]
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%a.neg = fneg <6 x double> %a
%res = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
ret <2 x double> %res
}
define <2 x double> @test_negation_move_to_result_with_nnan_flag(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_negation_move_to_result_with_nnan_flag(
; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: [[RES:%.*]] = fneg nnan <2 x double> [[TMP1]]
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%a.neg = fneg <6 x double> %a
%res = tail call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
ret <2 x double> %res
}
define <2 x double> @test_negation_move_to_result_with_nsz_flag(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_negation_move_to_result_with_nsz_flag(
; CHECK-NEXT: [[TMP1:%.*]] = call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: [[RES:%.*]] = fneg nsz <2 x double> [[TMP1]]
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%a.neg = fneg <6 x double> %a
%res = tail call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
ret <2 x double> %res
}
define <2 x double> @test_negation_move_to_result_with_fastflag_on_negation(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_negation_move_to_result_with_fastflag_on_negation(
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%a.neg = fneg fast<6 x double> %a
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
ret <2 x double> %res
}
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
define <9 x double> @test_move_negation_to_second_operand(<27 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_move_negation_to_second_operand(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%a.neg = fneg <27 x double> %a
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
ret <9 x double> %res
}
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
; Fast flag should be preserved
define <9 x double> @test_move_negation_to_second_operand_with_fast_flags(<27 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%a.neg = fneg <27 x double> %a
%res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
ret <9 x double> %res
}
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
define <2 x double> @test_negation_move_to_result_from_second_operand(<3 x double> %a, <6 x double> %b){
; CHECK-LABEL: @test_negation_move_to_result_from_second_operand(
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B:%.*]], i32 1, i32 3, i32 2)
; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%b.neg = fneg <6 x double> %b
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> %a, <6 x double> %b.neg, i32 1, i32 3, i32 2)
ret <2 x double> %res
}
; %a has the fewest vector elements between the result and the two operands so the negation can be moved there
define <9 x double> @test_move_negation_to_first_operand(<3 x double> %a, <27 x double> %b) {
; CHECK-LABEL: @test_move_negation_to_first_operand(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[TMP1]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9)
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%b.neg = fneg <27 x double> %b
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a, <27 x double> %b.neg, i32 1, i32 3, i32 9)
ret <9 x double> %res
}
; %a has the fewest vector elements between the result and the two operands so the negation is not moved
define <15 x double> @test_negation_not_moved(<3 x double> %a, <5 x double> %b) {
; CHECK-LABEL: @test_negation_not_moved(
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A_NEG]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
; CHECK-NEXT: ret <15 x double> [[RES]]
;
%a.neg = fneg <3 x double> %a
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a.neg, <5 x double> %b, i32 3, i32 1, i32 5)
ret <15 x double> %res
}
; %b as the fewest vector elements between the result and the two operands so the negation is not moved
define <15 x double> @test_negation_not_moved_second_operand(<5 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @test_negation_not_moved_second_operand(
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> [[A:%.*]], <3 x double> [[B_NEG]], i32 5, i32 1, i32 3)
; CHECK-NEXT: ret <15 x double> [[RES]]
;
%b.neg = fneg <3 x double> %b
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> %a, <3 x double> %b.neg, i32 5, i32 1, i32 3)
ret <15 x double> %res
}
; the negation should be moved from the result to operand %a because it has the smallest vector element count
define <15 x double> @test_negation_on_result(<3 x double> %a, <5 x double> %b) {
; CHECK-LABEL: @test_negation_on_result(
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
; CHECK-NEXT: [[RES_2:%.*]] = fneg <15 x double> [[RES]]
; CHECK-NEXT: ret <15 x double> [[RES_2]]
;
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b, i32 3, i32 1, i32 5)
%res.2 = fneg <15 x double> %res
ret <15 x double> %res.2
}
; both negations can be deleted
define <2 x double> @test_with_two_operands_negated1(<6 x double> %a, <3 x double> %b){
; CHECK-LABEL: @test_with_two_operands_negated1(
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%a.neg = fneg <6 x double> %a
%b.neg = fneg <3 x double> %b
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
ret <2 x double> %res
}
; both negations will be removed
define <9 x double> @test_with_two_operands_negated2(<27 x double> %a, <3 x double> %b){
; CHECK-LABEL: @test_with_two_operands_negated2(
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%a.neg = fneg <27 x double> %a
%b.neg = fneg <3 x double> %b
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
ret <9 x double> %res
}
; both negations will be removed
define <9 x double> @test_with_two_operands_negated_with_fastflags(<27 x double> %a, <3 x double> %b){
; CHECK-LABEL: @test_with_two_operands_negated_with_fastflags(
; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%a.neg = fneg <27 x double> %a
%b.neg = fneg <3 x double> %b
%res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
ret <9 x double> %res
}
; both negations should be removed
define <9 x double> @test_with_two_operands_negated2_commute(<3 x double> %a, <27 x double> %b){
; CHECK-LABEL: @test_with_two_operands_negated2_commute(
; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9)
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%a.neg = fneg <3 x double> %a
%b.neg = fneg <27 x double> %b
%res = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a.neg, <27 x double> %b.neg, i32 1, i32 3, i32 9)
ret <9 x double> %res
}
define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size(
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 2, i32 1, i32 2)
; CHECK-NEXT: ret <4 x double> [[RES]]
;
%a.neg = fneg <2 x double> %a
%b.neg = fneg <2 x double> %b
%res = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> %a.neg, <2 x double> %b.neg, i32 2, i32 1, i32 2)
ret <4 x double> %res
}
define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]]
; CHECK-NEXT: ret <2 x double> [[RES_3]]
;
%a.neg = fneg <6 x double> %a
%b.neg = fneg <3 x double> %b
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
%res.2 = shufflevector <6 x double> %a.neg, <6 x double> undef,
<2 x i32> <i32 0, i32 1>
%res.3 = fadd <2 x double> %res.2, %res
ret <2 x double> %res.3
}
define <9 x double> @matrix_multiply_two_operands_with_multiple_uses2(<27 x double> %a, <3 x double> %b, ptr %a_loc, ptr %b_loc){
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses2(
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A]], <3 x double> [[B]], i32 9, i32 3, i32 1)
; CHECK-NEXT: store <27 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 256
; CHECK-NEXT: store <3 x double> [[B_NEG]], ptr [[B_LOC:%.*]], align 32
; CHECK-NEXT: ret <9 x double> [[RES]]
;
%a.neg = fneg <27 x double> %a
%b.neg = fneg <3 x double> %b
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
store <27 x double> %a.neg, ptr %a_loc
store <3 x double> %b.neg, ptr %b_loc
ret <9 x double> %res
}
define <12 x double> @fneg_with_multiple_uses(<15 x double> %a, <20 x double> %b){
; CHECK-LABEL: @fneg_with_multiple_uses(
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <15 x double> [[A_NEG]], <15 x double> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[RES_3:%.*]] = fadd <12 x double> [[RES_2]], [[RES]]
; CHECK-NEXT: ret <12 x double> [[RES_3]]
;
%a.neg = fneg <15 x double> %a
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
%res.2 = shufflevector <15 x double> %a.neg, <15 x double> undef,
<12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
%res.3 = fadd <12 x double> %res.2, %res
ret <12 x double> %res.3
}
define <12 x double> @fneg_with_multiple_uses_2(<15 x double> %a, <20 x double> %b, ptr %a_loc){
; CHECK-LABEL: @fneg_with_multiple_uses_2(
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
; CHECK-NEXT: store <15 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 128
; CHECK-NEXT: ret <12 x double> [[RES]]
;
%a.neg = fneg <15 x double> %a
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
store <15 x double> %a.neg, ptr %a_loc
ret <12 x double> %res
}
; negation should be moved to the second operand given it has the smallest operand count
define <72 x double> @chain_of_matrix_multiplies(<27 x double> %a, <3 x double> %b, <8 x double> %c) {
; CHECK-LABEL: @chain_of_matrix_multiplies(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8)
; CHECK-NEXT: ret <72 x double> [[RES_2]]
;
%a.neg = fneg <27 x double> %a
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
%res.2 = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> %res, <8 x double> %c, i32 9, i32 1, i32 8)
ret <72 x double> %res.2
}
; first negation should be moved to %a
; second negation should be moved to the result of the second multipication
define <6 x double> @chain_of_matrix_multiplies_with_two_negations(<3 x double> %a, <5 x double> %b, <10 x double> %c) {
; CHECK-LABEL: @chain_of_matrix_multiplies_with_two_negations(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]]
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[TMP1]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
; CHECK-NEXT: [[TMP2:%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2)
; CHECK-NEXT: [[RES_2:%.*]] = fneg <6 x double> [[TMP2]]
; CHECK-NEXT: ret <6 x double> [[RES_2]]
;
%b.neg = fneg <5 x double> %b
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b.neg, i32 3, i32 1, i32 5)
%res.neg = fneg <15 x double> %res
%res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> %res.neg, <10 x double> %c, i32 3, i32 5, i32 2)
ret <6 x double> %res.2
}
; negation should be propagated to the result of the second matrix multiplication
define <6 x double> @chain_of_matrix_multiplies_propagation(<15 x double> %a, <20 x double> %b, <8 x double> %c){
; CHECK-LABEL: @chain_of_matrix_multiplies_propagation(
; CHECK-NEXT: [[TMP1:%.*]] = call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A:%.*]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
; CHECK-NEXT: [[TMP2:%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[TMP1]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2)
; CHECK-NEXT: [[RES_2:%.*]] = fneg <6 x double> [[TMP2]]
; CHECK-NEXT: ret <6 x double> [[RES_2]]
;
%a.neg = fneg <15 x double> %a
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
%res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> %res, <8 x double> %c, i32 3, i32 4, i32 2)
ret <6 x double> %res.2
}
declare <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double>, <6 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double>, <27 x double>, i32 immarg, i32 immarg, i32 immarg)
declare <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double>, <5 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double>, <20 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <21 x double> @llvm.matrix.multiply.v21f64.v15f64.v35f64(<15 x double>, <35 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double>, <10 x double>, i32 immarg, i32 immarg, i32 immarg) #1
declare <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1