- Notifications
You must be signed in to change notification settings - Fork 15.3k
[DAG] Recognise AVGFLOOR (((A >> 1) + (B >> 1)) + (A & B & 1)) patterns #169644
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Recognise 'LSB' style AVGFLOOR patterns. I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity. Fixes llvm#53648
| @llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesRecognise 'LSB' style AVGFLOOR patterns. I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity. Fixes #53648 Full diff: https://github.com/llvm/llvm-project/pull/169644.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6b79dbb46cadc..813cbeafeaec9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3154,19 +3154,31 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { } // Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1) +// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1) SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); SDValue A, B; + // FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing. if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) && - sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), - m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) { + (sd_match(N, + m_Add(m_And(m_Value(A), m_Value(B)), + m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) || + sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()), + m_Srl(m_Value(B), m_One())), + m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), + m_One()))))) { return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B); } if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) && - sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), - m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) { + (sd_match(N, + m_Add(m_And(m_Value(A), m_Value(B)), + m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) || + sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()), + m_Sra(m_Value(B), m_One())), + m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), + m_One()))))) { return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B); } diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll index fd303192e6c50..c8bbc875834d1 100644 --- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll +++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll @@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind { define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind { ; X86-LABEL: test_lsb_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarb %dl -; X86-NEXT: andb %cl, %al -; X86-NEXT: sarb %cl -; X86-NEXT: addb %dl, %cl -; X86-NEXT: andb $1, %al -; X86-NEXT: addb %cl, %al +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarb %al -; X64-NEXT: andb %sil, %dil -; X64-NEXT: sarb %sil -; X64-NEXT: addb %sil, %al -; X64-NEXT: andb $1, %dil -; X64-NEXT: addb %dil, %al +; X64-NEXT: movsbl %sil, %ecx +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %s0 = ashr i8 %a0, 1 %s1 = ashr i8 %a1, 1 @@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind { ; X86: # %bb.0: ; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarl %edx -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: sarl %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: andl $1, %eax ; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i16: ; X64: # %bb.0: -; X64-NEXT: movswl %si, %eax -; X64-NEXT: movswl %di, %ecx -; X64-NEXT: sarl %ecx -; X64-NEXT: sarl %eax +; X64-NEXT: movswl %si, %ecx +; X64-NEXT: movswl %di, %eax ; X64-NEXT: addl %ecx, %eax -; X64-NEXT: andl %esi, %edi -; X64-NEXT: andl $1, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: shrl %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %s0 = ashr i16 %a0, 1 @@ -316,21 +301,19 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sarl %ebx -; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %ebx ; X86-NEXT: movl %ecx, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: shrdl $1, %edx, %ebx +; X86-NEXT: andl %edi, %ecx ; X86-NEXT: sarl %edx -; X86-NEXT: shldl $31, %esi, %ecx -; X86-NEXT: addl %edi, %ecx -; X86-NEXT: adcl %ebx, %edx ; X86-NEXT: andl %esi, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -338,14 +321,11 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind { ; ; X64-LABEL: test_lsb_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: sarq %rcx -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: sarq %rax -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: andl $1, %edi -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: andq %rdi, %rax +; X64-NEXT: xorq %rdi, %rsi +; X64-NEXT: sarq %rsi +; X64-NEXT: addq %rsi, %rax ; X64-NEXT: retq %s0 = ashr i64 %a0, 1 %s1 = ashr i64 %a1, 1 diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll index 9ae4492bb4cd4..7ad10164ad484 100644 --- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll +++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll @@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrb %dl -; X86-NEXT: andb %cl, %al -; X86-NEXT: shrb %cl -; X86-NEXT: addb %dl, %cl -; X86-NEXT: andb $1, %al -; X86-NEXT: addb %cl, %al +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrb %al -; X64-NEXT: andb %sil, %dil -; X64-NEXT: shrb %sil -; X64-NEXT: addb %sil, %al -; X64-NEXT: andb $1, %dil -; X64-NEXT: addb %dil, %al +; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %s0 = lshr i8 %a0, 1 %s1 = lshr i8 %a1, 1 @@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: shrl %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: andl $1, %eax ; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: shrl %ecx -; X64-NEXT: shrl %eax +; X64-NEXT: movzwl %si, %ecx +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: addl %ecx, %eax -; X64-NEXT: andl %esi, %edi -; X64-NEXT: andl $1, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: shrl %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %s0 = lshr i16 %a0, 1 @@ -300,40 +285,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind { ; X86-LABEL: test_lsb_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: shrl %ebx -; X86-NEXT: shldl $31, %eax, %edi -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: shldl $31, %esi, %ecx -; X86-NEXT: addl %edi, %ecx -; X86-NEXT: adcl %ebx, %edx -; X86-NEXT: andl %esi, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl $0, %edx -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %dl +; X86-NEXT: movzbl %dl, %edx +; X86-NEXT: shldl $31, %eax, %edx +; X86-NEXT: shldl $31, %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: shrq %rax -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: andl $1, %edi -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: andq %rdi, %rax +; X64-NEXT: xorq %rdi, %rsi +; X64-NEXT: shrq %rsi +; X64-NEXT: addq %rsi, %rax ; X64-NEXT: retq %s0 = lshr i64 %a0, 1 %s1 = lshr i64 %a1, 1 |
Recognise 'LSB' style AVGFLOOR patterns.
I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity.
Alive2: https://alive2.llvm.org/ce/z/nfSSk_
Fixes #53648