[ValueTracking] Compute known FPClass from dominating condition #80941

dtcxzyw · 2024-02-07T06:21:31Z

This patch improves computeKnownFPClass by using context-sensitive information from DomConditionCache.

Full diff: https://github.com/llvm/llvm-project/pull/80941.diff

3 Files Affected:

(modified) llvm/lib/Analysis/DomConditionCache.cpp (+5)
(modified) llvm/lib/Analysis/ValueTracking.cpp (+67-27)
(added) llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll (+208)

diff --git a/llvm/lib/Analysis/DomConditionCache.cpp b/llvm/lib/Analysis/DomConditionCache.cpp index c7f4cab415888..071d8301b554c 100644 --- a/llvm/lib/Analysis/DomConditionCache.cpp +++ b/llvm/lib/Analysis/DomConditionCache.cpp @@ -53,6 +53,11 @@ static void findAffectedValues(Value *Cond, AddAffected(X); } } + // Handle patterns that computeKnownFPClass() support. + if (match(Cond, m_FCmp(Pred, m_Value(A), m_Constant()))) + AddAffected(A); + if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A), m_Constant()))) + AddAffected(A); } void DomConditionCache::registerBranch(BranchInst *BI) { diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 58db81f470130..9ce18631b05bb 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4213,9 +4213,56 @@ llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, return fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc); } -static FPClassTest computeKnownFPClassFromAssumes(const Value *V, - const SimplifyQuery &Q) { - FPClassTest KnownFromAssume = fcAllFlags; +static void computeKnownFPClassFromCond(const Value *V, Value *Cond, + bool CondIsTrue, + const Instruction *CxtI, + KnownFPClass &KnownFromContext) { + CmpInst::Predicate Pred; + Value *LHS; + Value *RHS; + uint64_t ClassVal = 0; + // TODO: handle sign-bit check idiom + if (match(Cond, m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { + const APFloat *CRHS; + if (match(RHS, m_APFloat(CRHS))) { + auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass( + Pred, *CxtI->getParent()->getParent(), LHS, *CRHS, LHS != V); + if (CmpVal == V) + KnownFromContext.knownNot(~(CondIsTrue ? MaskIfTrue : MaskIfFalse)); + } + } else if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>( + m_Value(LHS), m_ConstantInt(ClassVal)))) { + FPClassTest Mask = static_cast<FPClassTest>(ClassVal); + KnownFromContext.knownNot(CondIsTrue ? ~Mask : Mask); + } +} + +static KnownFPClass computeKnownFPClassFromContext(const Value *V, + const SimplifyQuery &Q) { + KnownFPClass KnownFromContext; + + if (!Q.CxtI) + return KnownFromContext; + + if (Q.DC && Q.DT) { + // Handle dominating conditions. + for (BranchInst *BI : Q.DC->conditionsFor(V)) { + Value *Cond = BI->getCondition(); + + BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); + if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) + computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/true, Q.CxtI, + KnownFromContext); + + BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); + if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) + computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/false, Q.CxtI, + KnownFromContext); + } + } + + if (!Q.AC) + return KnownFromContext; // Try to restrict the floating-point classes based on information from // assumptions. @@ -4233,25 +4280,11 @@ static FPClassTest computeKnownFPClassFromAssumes(const Value *V, if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) continue; - CmpInst::Predicate Pred; - Value *LHS, *RHS; - uint64_t ClassVal = 0; - if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { - const APFloat *CRHS; - if (match(RHS, m_APFloat(CRHS))) { - auto [CmpVal, MaskIfTrue, MaskIfFalse] = - fcmpImpliesClass(Pred, *F, LHS, *CRHS, LHS != V); - if (CmpVal == V) - KnownFromAssume &= MaskIfTrue; - } - } else if (match(I->getArgOperand(0), - m_Intrinsic<Intrinsic::is_fpclass>( - m_Value(LHS), m_ConstantInt(ClassVal)))) { - KnownFromAssume &= static_cast<FPClassTest>(ClassVal); - } + computeKnownFPClassFromCond(V, I->getArgOperand(0), /*CondIsTrue=*/true, + Q.CxtI, KnownFromContext); } - return KnownFromAssume; + return KnownFromContext; } void computeKnownFPClass(const Value *V, const APInt &DemandedElts, @@ -4359,10 +4392,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, KnownNotFromFlags |= fcInf; } - if (Q.AC) { - FPClassTest AssumedClasses = computeKnownFPClassFromAssumes(V, Q); - KnownNotFromFlags |= ~AssumedClasses; - } + KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q); + KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses; // We no longer need to find out about these bits from inputs if we can // assume this from flags/attributes. @@ -4370,6 +4401,12 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, auto ClearClassesFromFlags = make_scope_exit([=, &Known] { Known.knownNot(KnownNotFromFlags); + if (!Known.SignBit && AssumedClasses.SignBit) { + if (*AssumedClasses.SignBit) + Known.signBitMustBeOne(); + else + Known.signBitMustBeZero(); + } }); if (!Op) @@ -5271,7 +5308,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, bool First = true; - for (Value *IncValue : P->incoming_values()) { + for (const Use &U : P->operands()) { + Value *IncValue = U.get(); // Skip direct self references. if (IncValue == P) continue; @@ -5280,8 +5318,10 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, // Recurse, but cap the recursion to two levels, because we don't want // to waste time spinning around in loops. We need at least depth 2 to // detect known sign bits. - computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, - PhiRecursionLimit, Q); + computeKnownFPClass( + IncValue, DemandedElts, InterestedClasses, KnownSrc, + PhiRecursionLimit, + Q.getWithInstruction(P->getIncomingBlock(U)->getTerminator())); if (First) { Known = KnownSrc; diff --git a/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll b/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll new file mode 100644 index 0000000000000..48c6e91057094 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define i1 @test1(float %x) { +; CHECK-LABEL: define i1 @test1( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = fcmp ueq float [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret i1 false +; CHECK: if.else: +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X]], i32 780) +; CHECK-NEXT: ret i1 [[RET]] +; +entry: + %cond = fcmp ueq float %x, 0.000000e+00 + br i1 %cond, label %if.then, label %if.else + +if.then: + ret i1 false + +if.else: + %ret = call i1 @llvm.is.fpclass.f32(float %x, i32 783) + ret i1 %ret +} + +define i1 @test2(double %x) { +; CHECK-LABEL: define i1 @test2( +; CHECK-SAME: double [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0x3EB0C6F7A0000000 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret i1 false +; CHECK: if.end: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp olt double %x, 0x3EB0C6F7A0000000 + br i1 %cmp, label %if.then, label %if.end +if.then: + ret i1 false +if.end: + %cmp.i = fcmp oeq double %x, 0.000000e+00 + ret i1 %cmp.i +} + +define i1 @test3(float %x) { +; CHECK-LABEL: define i1 @test3( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X]], 3.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[RET:%.*]] = fcmp oeq float [[X]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[RET]] +; CHECK: if.else: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp ogt float %x, 3.000000e+00 + br i1 %cmp, label %if.then, label %if.else +if.then: + %abs = call float @llvm.fabs.f32(float %x) + %ret = fcmp oeq float %abs, 0x7FF0000000000000 + ret i1 %ret +if.else: + ret i1 false +} + +define float @test4(float %x) { +; CHECK-LABEL: define float @test4( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[X]], 0x3EB0C6F7A0000000 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret float 1.000000e+00 +; CHECK: if.end: +; CHECK-NEXT: [[DIV:%.*]] = fdiv float 1.000000e+00, [[X]] +; CHECK-NEXT: ret float [[DIV]] +; +entry: + %cmp = fcmp olt float %x, 0x3EB0C6F7A0000000 + br i1 %cmp, label %if.then, label %if.end + +if.then: + ret float 1.0 + +if.end: + %cmp.i = fcmp oeq float %x, 0.000000e+00 + %div = fdiv float 1.000000e+00, %x + %ret = select i1 %cmp.i, float 1.000000e+00, float %div + ret float %ret +} + +define i1 @test5(double %x, i1 %cond) { +; CHECK-LABEL: define i1 @test5( +; CHECK-SAME: double [[X:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[CMP:%.*]] = fcmp uno double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret i1 false +; CHECK: if.end: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[Y:%.*]] = phi double [ -1.000000e+00, [[ENTRY:%.*]] ], [ [[X]], [[IF_END]] ] +; CHECK-NEXT: [[RET:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[Y]], i32 408) +; CHECK-NEXT: ret i1 [[RET]] +; +entry: + br i1 %cond, label %if, label %exit +if: + %cmp = fcmp uno double %x, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end +if.then: + ret i1 false +if.end: + br label %exit +exit: + %y = phi double [ -1.000000e+00, %entry ], [ %x, %if.end ] + %ret = tail call i1 @llvm.is.fpclass.f64(double %y, i32 411) + ret i1 %ret +} + +define i1 @test6(double %x) { +; CHECK-LABEL: define i1 @test6( +; CHECK-SAME: double [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] +; CHECK: land.rhs: +; CHECK-NEXT: [[AND_I:%.*]] = bitcast double [[X]] to i64 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i64 [[AND_I]], 9218868437227405312 +; CHECK-NEXT: br label [[LAND_END]] +; CHECK: land.end: +; CHECK-NEXT: [[RET:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP_I]], [[LAND_RHS]] ] +; CHECK-NEXT: ret i1 [[RET]] +; +entry: + %cmp = fcmp ogt double %x, 0.000000e+00 + br i1 %cmp, label %land.rhs, label %land.end + +land.rhs: + %abs = tail call double @llvm.fabs.f64(double %x) + %and.i = bitcast double %abs to i64 + %cmp.i = icmp eq i64 %and.i, 9218868437227405312 + br label %land.end + +land.end: + %ret = phi i1 [ false, %entry ], [ %cmp.i, %land.rhs ] + ret i1 %ret +} + +define i1 @test7(float %x) { +; CHECK-LABEL: define i1 @test7( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[COND:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X]], i32 345) +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[RET1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X]], i32 328) +; CHECK-NEXT: ret i1 [[RET1]] +; CHECK: if.else: +; CHECK-NEXT: [[RET2:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X]], i32 128) +; CHECK-NEXT: ret i1 [[RET2]] +; + %cond = call i1 @llvm.is.fpclass.f32(float %x, i32 345) + br i1 %cond, label %if.then, label %if.else +if.then: + %ret1 = call i1 @llvm.is.fpclass.f32(float %x, i32 456) + ret i1 %ret1 +if.else: + %ret2 = call i1 @llvm.is.fpclass.f32(float %x, i32 456) + ret i1 %ret2 +} + +define i1 @test1_no_dominating(float %x, i1 %c) { +; CHECK-LABEL: define i1 @test1_no_dominating( +; CHECK-SAME: float [[X:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: entry0: +; CHECK-NEXT: br i1 [[C]], label [[ENTRY:%.*]], label [[IF_ELSE:%.*]] +; CHECK: entry: +; CHECK-NEXT: [[COND:%.*]] = fcmp ueq float [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE]] +; CHECK: if.then: +; CHECK-NEXT: ret i1 false +; CHECK: if.else: +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X]], i32 783) +; CHECK-NEXT: ret i1 [[RET]] +; +entry0: + br i1 %c, label %entry, label %if.else + +entry: + %cond = fcmp ueq float %x, 0.000000e+00 + br i1 %cond, label %if.then, label %if.else + +if.then: + ret i1 false + +if.else: + %ret = call i1 @llvm.is.fpclass.f32(float %x, i32 783) + ret i1 %ret +}

PR Link: llvm/llvm-project#80941

@src

This patch generalizes `simplifyAndOrOfFCmps` to simplify patterns like: ``` define i1 @src(float %x, float %y) { %or.cond.i = fcmp ord float %x, 0.000000e+00 %cmp.i.i34 = fcmp olt float %x, %y %cmp.i2.sink.i = and i1 %or.cond.i, %cmp.i.i34 ret i1 %cmp.i2.sink.i } define i1 @tgt(float %x, float %y) { %cmp.i.i34 = fcmp olt float %x, %y ret i1 %cmp.i.i34 } ``` Alive2: https://alive2.llvm.org/ce/z/9rydcx This patch and #80986 will fix the regression introduced by #80941. See also the IR diff dtcxzyw/llvm-opt-benchmark#199 (comment).

arsenm

Could use a few tests where the dominating condition is the fcmp + fabs/fneg case

llvm/lib/Analysis/ValueTracking.cpp

PR Link: llvm/llvm-project#80941

dtcxzyw · 2024-02-08T20:22:10Z

The regression dtcxzyw/llvm-opt-benchmark#199 (comment) has been fixed.

arsenm

lgtm with nit

llvm/lib/Analysis/DomConditionCache.cpp

dtcxzyw · 2024-02-12T10:21:51Z

Ping.

eaeltsin · 2024-02-15T20:37:12Z

Hi,

This seems to break a couple of our tests, can you please take a look?

Archive.zip is a source ll file and files obtained
by opt -O1 before and after this change.

Thank you!

dtcxzyw · 2024-02-16T04:20:14Z

Hi,

This seems to break a couple of our tests, can you please take a look?

Archive.zip is a source ll file and files obtained by opt -O1 before and after this change.

Thank you!

I will have a look.

dtcxzyw · 2024-02-16T04:52:24Z

Reduced test case: https://alive2.llvm.org/ce/z/YgQSin
I will post a fix later.

dtcxzyw · 2024-02-16T07:16:54Z

@eaeltsin Nice catch! Should be fixed by #81972.

This patch adds the missing `subnormal -> normal` part for `fpext` in `computeKnownFPClass`. Fixes the miscompilation reported by #80941 (comment).

This patch adds the missing `subnormal -> normal` part for `fpext` in `computeKnownFPClass`. Fixes the miscompilation reported by llvm#80941 (comment). (cherry picked from commit a5865c3)

dtcxzyw requested a review from nikic as a code owner February 7, 2024 06:21

llvmbot added llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Feb 7, 2024

dtcxzyw requested review from arsenm, goldsteinn and jcranmer-intel February 7, 2024 06:21

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 7, 2024

pre-commit: test PR80941

463ae01

PR Link: llvm/llvm-project#80941

This was referenced Feb 7, 2024

pre-commit: test PR80941 dtcxzyw/llvm-opt-benchmark#199

Closed

[InstSimplify] Generalize simplifyAndOrOfFCmps #81027

Merged

arsenm reviewed Feb 8, 2024

View reviewed changes

llvm/lib/Analysis/ValueTracking.cpp Outdated Show resolved Hide resolved

nikic mentioned this pull request Feb 8, 2024

[ValueTracking] Support dominating known bits condition in and/or #74728

Merged

dtcxzyw added 4 commits February 8, 2024 22:45

[ValueTracking] Add pre-commit tests. NFC.

f475b6d

[ValueTracking] Compute known FPClass from dominating condition

9fc28b7

fixup! [ValueTracking] Compute known FPClass from dominating condition

e1fa250

[ValueTracking] Add more tests.

90629b9

dtcxzyw force-pushed the perf/compute-known-fpclass-from-ctx-fcmp-isfpclass branch from b80a7db to 90629b9 Compare February 8, 2024 16:20

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 8, 2024

pre-commit: test PR80941

fc65538

PR Link: llvm/llvm-project#80941

arsenm reviewed Feb 9, 2024

View reviewed changes

llvm/lib/Analysis/DomConditionCache.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/DomConditionCache.cpp Outdated Show resolved Hide resolved

fixup! [ValueTracking] Compute known FPClass from dominating condition

5738f8e

dtcxzyw requested a review from arsenm February 11, 2024 01:22

arsenm approved these changes Feb 12, 2024

View reviewed changes

dtcxzyw merged commit 542a3cb into llvm:main Feb 13, 2024

dtcxzyw deleted the perf/compute-known-fpclass-from-ctx-fcmp-isfpclass branch February 13, 2024 03:18

dtcxzyw mentioned this pull request Feb 16, 2024

[ValueTracking] Fix computeKnownFPClass for fpext #81972

Merged

dtcxzyw added a commit that referenced this pull request Feb 17, 2024

[ValueTracking] Fix computeKnownFPClass for fpext (#81972)

a5865c3

This patch adds the missing `subnormal -> normal` part for `fpext` in `computeKnownFPClass`. Fixes the miscompilation reported by #80941 (comment).

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[ValueTracking] Compute known FPClass from dominating condition #80941

[ValueTracking] Compute known FPClass from dominating condition #80941

Uh oh!

dtcxzyw commented Feb 7, 2024

llvmbot commented Feb 7, 2024 •

edited

Loading

arsenm left a comment

Uh oh!

dtcxzyw commented Feb 8, 2024

arsenm left a comment

Uh oh!

Uh oh!

dtcxzyw commented Feb 12, 2024

eaeltsin commented Feb 15, 2024

dtcxzyw commented Feb 16, 2024

dtcxzyw commented Feb 16, 2024

dtcxzyw commented Feb 16, 2024

Labels

4 participants

[ValueTracking] Compute known FPClass from dominating condition #80941

[ValueTracking] Compute known FPClass from dominating condition #80941

Uh oh!

Conversation

dtcxzyw commented Feb 7, 2024

llvmbot commented Feb 7, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

dtcxzyw commented Feb 8, 2024

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

dtcxzyw commented Feb 12, 2024

eaeltsin commented Feb 15, 2024

dtcxzyw commented Feb 16, 2024

dtcxzyw commented Feb 16, 2024

dtcxzyw commented Feb 16, 2024

Labels

4 participants

llvmbot commented Feb 7, 2024 •

edited

Loading