Skip to content

Commit a14b09d

Browse files
author
Yonghong Song
committed
[BPF] Support signed division at cpu v1 with constant divisor
The motivation example likes below $ cat t1.c struct S { int var[3]; }; int foo1 (struct S *a, struct S *b) { return a - b; } For cpu v1/v2/v3, the compilation will fail with the following errors: $ clang --target=bpf -O2 -c t1.c -mcpu=v3 t1.c:4:5: error: unsupported signed division, please convert to unsigned div/mod. 4 | int foo1 (struct S *a, struct S *b) | ^ 1 error generated. The reason is that sdiv/smod is supported at -mcpu=v4. At cpu v1/v2/v3, only udiv/umod is supported. But the above example (for func foo1()) is reasonable common and user has to workaround the compilation failure by using udiv with conditionals. For x86, for the above t1.c, compile and dump the asm code like below: $ clang -O2 -c t1.c && llvm-objdump -d t1.o 0000000000000000 <foo1>: 0: 48 29 f7 subq %rsi, %rdi 3: 48 c1 ef 02 shrq $0x2, %rdi 7: 69 c7 ab aa aa aa imull $0xaaaaaaab, %edi, %eax # imm = 0xAAAAAAAB d: c3 retq Basically sdiv can be replaced with sub, shr and imul. Latest gcc-bpf is also able to generate code similar to x86 with -mcpu=v1. See https://godbolt.org/z/feP9ETbjj So let us add clang support for sdiv (constant divisor) as well at -mcpu=v1. But we still want to keep udiv untouched at -mcpu=v1. One more parameter "bool IsSigned" is added to isIntDivCheap(). The "IsSigned" parameter is used only by BPF backend to ensure udiv not impacted. Note that only 32-bit sdiv (constant divisor) can be converted into sub/shr/imul. 64-bit sdiv (constant divisor) cannot be converted since bpf does not support 64-bit multiplication without potential overflow.
1 parent 5d45815 commit a14b09d

19 files changed

+133
-35
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
565565
if (!isa<ConstantInt>(Inst.getOperand(1)))
566566
return false;
567567
EVT VT = getTLI()->getValueType(DL, Inst.getType());
568-
return !getTLI()->isIntDivCheap(VT, Fn.getAttributes());
568+
569+
bool IsSigned = true;
570+
if (Inst.getOpcode() == Instruction::SDiv ||
571+
Inst.getOpcode() == Instruction::SRem)
572+
IsSigned = false;
573+
return !getTLI()->isIntDivCheap(VT, IsSigned, Fn.getAttributes());
569574
}
570575
};
571576

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,9 @@ class TargetLoweringBase {
549549
/// several shifts, adds, and multiplies for this target.
550550
/// The definition of "cheaper" may depend on whether we're optimizing
551551
/// for speed or for size.
552-
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
552+
virtual bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const {
553+
return false;
554+
}
553555

554556
/// Return true if the target can handle a standalone remainder operation.
555557
virtual bool hasStandaloneRem(EVT VT) const {

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5376,7 +5376,7 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
53765376
const auto &TLI = getTargetLowering();
53775377
LLVMContext &Ctx = MF.getFunction().getContext();
53785378
auto &DL = MF.getDataLayout();
5379-
if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5379+
if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), false, Attr))
53805380
return false;
53815381

53825382
// Don't do this for minsize because the instruction sequence is usually
@@ -5426,7 +5426,7 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
54265426
const auto &TLI = getTargetLowering();
54275427
LLVMContext &Ctx = MF.getFunction().getContext();
54285428
auto &DL = MF.getDataLayout();
5429-
if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5429+
if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), true, Attr))
54305430
return false;
54315431

54325432
// Don't do this for minsize because the instruction sequence is usually

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4844,7 +4844,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
48444844
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
48454845
// true. Otherwise, we break the simplification logic in visitREM().
48464846
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4847-
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4847+
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), true, Attr))
48484848
if (SDValue DivRem = useDivRem(N))
48494849
return DivRem;
48504850

@@ -4929,7 +4929,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
49294929
// trade-offs.
49304930
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
49314931
if (isConstantOrConstantVector(N1) &&
4932-
!TLI.isIntDivCheap(N->getValueType(0), Attr))
4932+
!TLI.isIntDivCheap(N->getValueType(0), true, Attr))
49334933
if (SDValue Op = BuildSDIV(N))
49344934
return Op;
49354935

@@ -4984,7 +4984,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
49844984
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
49854985
// true. Otherwise, we break the simplification logic in visitREM().
49864986
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4987-
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4987+
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), false, Attr))
49884988
if (SDValue DivRem = useDivRem(N))
49894989
return DivRem;
49904990

@@ -5033,7 +5033,7 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
50335033
// fold (udiv x, c) -> alternate
50345034
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
50355035
if (isConstantOrConstantVector(N1) &&
5036-
!TLI.isIntDivCheap(N->getValueType(0), Attr))
5036+
!TLI.isIntDivCheap(N->getValueType(0), false, Attr))
50375037
if (SDValue Op = BuildUDIV(N))
50385038
return Op;
50395039

@@ -5115,7 +5115,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
51155115
// by skipping the simplification if isIntDivCheap(). When div is not cheap,
51165116
// combine will not return a DIVREM. Regardless, checking cheapness here
51175117
// makes sense since the simplification results in fatter code.
5118-
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5118+
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, isSigned, Attr)) {
51195119
if (isSigned) {
51205120
// check if we can build faster implementation for srem
51215121
if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5377,11 +5377,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
53775377
N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
53785378
// When division is cheap or optimizing for minimum size,
53795379
// fall through to DIVREM creation by skipping this fold.
5380-
if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5381-
if (N0.getOpcode() == ISD::UREM) {
5380+
bool IsSigned = N0.getOpcode() == ISD::SREM;
5381+
if (!isIntDivCheap(VT, IsSigned, Attr) &&
5382+
!Attr.hasFnAttr(Attribute::MinSize)) {
5383+
if (!IsSigned) {
53825384
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
53835385
return Folded;
5384-
} else if (N0.getOpcode() == ISD::SREM) {
5386+
} else {
53855387
if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
53865388
return Folded;
53875389
}
@@ -6233,7 +6235,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
62336235
SelectionDAG &DAG,
62346236
SmallVectorImpl<SDNode *> &Created) const {
62356237
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6236-
if (isIntDivCheap(N->getValueType(0), Attr))
6238+
if (isIntDivCheap(N->getValueType(0), true, Attr))
62376239
return SDValue(N, 0); // Lower SDIV as SDIV
62386240
return SDValue();
62396241
}
@@ -6243,7 +6245,7 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
62436245
SelectionDAG &DAG,
62446246
SmallVectorImpl<SDNode *> &Created) const {
62456247
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6246-
if (isIntDivCheap(N->getValueType(0), Attr))
6248+
if (isIntDivCheap(N->getValueType(0), true, Attr))
62476249
return SDValue(N, 0); // Lower SREM as SREM
62486250
return SDValue();
62496251
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18544,7 +18544,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
1854418544
SelectionDAG &DAG,
1854518545
SmallVectorImpl<SDNode *> &Created) const {
1854618546
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
18547-
if (isIntDivCheap(N->getValueType(0), Attr))
18547+
if (isIntDivCheap(N->getValueType(0), true, Attr))
1854818548
return SDValue(N, 0); // Lower SDIV as SDIV
1854918549

1855018550
EVT VT = N->getValueType(0);
@@ -18574,7 +18574,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
1857418574
SelectionDAG &DAG,
1857518575
SmallVectorImpl<SDNode *> &Created) const {
1857618576
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
18577-
if (isIntDivCheap(N->getValueType(0), Attr))
18577+
if (isIntDivCheap(N->getValueType(0), true, Attr))
1857818578
return SDValue(N, 0); // Lower SREM as SREM
1857918579

1858018580
EVT VT = N->getValueType(0);
@@ -27816,7 +27816,8 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
2781627816
}
2781727817
}
2781827818

27819-
bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
27819+
bool AArch64TargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
27820+
AttributeList Attr) const {
2782027821
// Integer division on AArch64 is expensive. However, when aggressively
2782127822
// optimizing for code size, we prefer to use a div instruction, as it is
2782227823
// usually smaller than the alternative sequence.

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -845,7 +845,7 @@ class AArch64TargetLowering : public TargetLowering {
845845
return AArch64::X1;
846846
}
847847

848-
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
848+
bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const override;
849849

850850
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
851851
const MachineFunction &MF) const override {

llvm/lib/Target/BPF/BPFISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,11 @@ bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
238238
return TargetLoweringBase::isZExtFree(Val, VT2);
239239
}
240240

241+
bool BPFTargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
242+
AttributeList Attr) const {
243+
return (HasMovsx || !IsSigned) ? true : false;
244+
}
245+
241246
BPFTargetLowering::ConstraintType
242247
BPFTargetLowering::getConstraintType(StringRef Constraint) const {
243248
if (Constraint.size() == 1) {

llvm/lib/Target/BPF/BPFISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class BPFTargetLowering : public TargetLowering {
118118
return Op.size() >= 8 ? MVT::i64 : MVT::i32;
119119
}
120120

121-
bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
121+
bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const override;
122122

123123
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
124124
Type *Ty) const override {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12680,7 +12680,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1268012680
// to multiply by magic constant.
1268112681
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
1268212682
if (N->getOperand(1).getOpcode() == ISD::Constant &&
12683-
!isIntDivCheap(N->getValueType(0), Attr))
12683+
!isIntDivCheap(N->getValueType(0), N->getOpcode() == ISD::SDIV, Attr))
1268412684
return;
1268512685

1268612686
// If the input is i32, use ANY_EXTEND since the W instructions don't read
@@ -21275,7 +21275,8 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
2127521275
return SDValue();
2127621276
}
2127721277

21278-
bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
21278+
bool RISCVTargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
21279+
AttributeList Attr) const {
2127921280
// When aggressively optimizing for code size, we prefer to use a div
2128021281
// instruction, as it is usually smaller than the alternative sequence.
2128121282
// TODO: Add vector division?
@@ -21745,7 +21746,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
2174521746
SelectionDAG &DAG,
2174621747
SmallVectorImpl<SDNode *> &Created) const {
2174721748
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
21748-
if (isIntDivCheap(N->getValueType(0), Attr))
21749+
if (isIntDivCheap(N->getValueType(0), true, Attr))
2174921750
return SDValue(N, 0); // Lower SDIV as SDIV
2175021751

2175121752
// Only perform this transform if short forward branch opt is supported.

0 commit comments

Comments
 (0)