Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2408,6 +2408,17 @@ example:
attempt is made to diagnose unsupported uses. Currently this
attribute is respected by the AMDGPU and NVPTX backends.

``"denormal-fp-math-bf16"``
Same as ``"denormal-fp-math"``, but only controls the behavior of
the Brain Float16 type (or vectors of Brain Float16). If both are
are present, this overrides ``"denormal-fp-math"``. Not all targets
support separately setting the denormal mode per type, and no
attempt is made to diagnose unsupported uses. Currently this
attribute is respected by the X86 backend.

If this is attribute is not specified, the default is
``"preserve-sign,preserve-sign"``.

``"thunk"``
This attribute indicates that the function will delegate to some other
function with a tail call. The prototype of a thunk should not be used for
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/CommandFlags.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ bool getEnableNoTrappingFPMath();

DenormalMode::DenormalModeKind getDenormalFPMath();
DenormalMode::DenormalModeKind getDenormalFP32Math();
DenormalMode::DenormalModeKind getDenormalBF16Math();

bool getEnableHonorSignDependentRoundingFPMath();

Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/CodeGen/CommandFlags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ CGOPT(bool, EnableNoTrappingFPMath)
CGOPT(bool, EnableAIXExtendedAltivecABI)
CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
CGOPT(DenormalMode::DenormalModeKind, DenormalBF16Math)
CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
CGOPT(FloatABI::ABIType, FloatABIForCalls)
CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
Expand Down Expand Up @@ -277,6 +278,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
DenormFlagEnumOptions);
CGBINDOPT(DenormalFP32Math);

static cl::opt<DenormalMode::DenormalModeKind> DenormalBF16Math(
"denormal-fp-math-bf16",
cl::desc("Select which denormal numbers the code is permitted to require "
"for bfloat"),
cl::init(DenormalMode::PreserveSign), DenormFlagEnumOptions);
CGBINDOPT(DenormalBF16Math);

static cl::opt<bool> EnableHonorSignDependentRoundingFPMath(
"enable-sign-dependent-rounding-fp-math", cl::Hidden,
cl::desc("Force codegen to assume rounding mode can change dynamically"),
Expand Down Expand Up @@ -719,6 +727,14 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
DenormalMode(DenormKind, DenormKind).str());
}

if (DenormalBF16MathView->getNumOccurrences() > 0 &&
!F.hasFnAttribute("denormal-fp-math-bf16")) {
// FIXME: Command line flag should expose separate input/output modes.
DenormalMode::DenormalModeKind DenormKind = getDenormalBF16Math();
NewAttrs.addAttribute("denormal-fp-math-bf16",
DenormalMode(DenormKind, DenormKind).str());
}

if (TrapFuncNameView->getNumOccurrences() > 0)
for (auto &B : F)
for (auto &I : B)
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2283,7 +2283,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}

if (!Subtarget.useSoftFloat() &&
if (!Subtarget.useSoftFloat() && Subtarget.getDenormalMathFTZDAZBF16() &&
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
Expand Down Expand Up @@ -8740,6 +8740,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return LowerBUILD_VECTORvXi1(Op, dl, DAG, Subtarget);

if (VT.getVectorElementType() == MVT::bf16 &&
Subtarget.getDenormalMathFTZDAZBF16() &&
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16()))
return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget);

Expand Down Expand Up @@ -21536,6 +21537,7 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {

if (VT.getScalarType() == MVT::bf16) {
if (SVT.getScalarType() == MVT::f32 &&
Subtarget.getDenormalMathFTZDAZBF16() &&
((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
Subtarget.hasAVXNECONVERT()))
return Op;
Expand Down Expand Up @@ -21644,8 +21646,9 @@ SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op,
SDLoc DL(Op);

MVT SVT = Op.getOperand(0).getSimpleValueType();
if (SVT == MVT::f32 && ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
Subtarget.hasAVXNECONVERT())) {
if (SVT == MVT::f32 && Subtarget.getDenormalMathFTZDAZBF16() &&
((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
Subtarget.hasAVXNECONVERT())) {
SDValue Res;
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, Op.getOperand(0));
Res = DAG.getNode(X86ISD::CVTNEPS2BF16, DL, MVT::v8bf16, Res);
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,14 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
StringRef FS, const X86TargetMachine &TM,
MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth)
unsigned RequiredVectorWidth,
bool DenormalMathFTZDAZBF16)
: X86GenSubtargetInfo(TT, CPU, TuneCPU, FS),
PICStyle(PICStyles::Style::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
PreferVectorWidthOverride(PreferVectorWidthOverride),
RequiredVectorWidth(RequiredVectorWidth),
DenormalMathFTZDAZBF16(DenormalMathFTZDAZBF16),
InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/X86/X86Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Required vector width from function attribute.
unsigned RequiredVectorWidth;

/// Denormal math for bfloat from function attribute.
bool DenormalMathFTZDAZBF16 = false;

X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
Expand All @@ -119,8 +122,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
///
X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth);
unsigned PreferVectorWidthOverride, unsigned RequiredVectorWidth,
bool DenormalMathFTZDAZBF16);

const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
Expand Down Expand Up @@ -238,6 +241,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {

unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
bool getDenormalMathFTZDAZBF16() const { return DenormalMathFTZDAZBF16; }

// Helper functions to determine when we should allow widening to 512-bit
// during codegen.
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/X86/X86TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,15 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
}
}

// Extract denormal-fp-math-bf16 attribute.
bool DenormalMathFTZDAZBF16 = true;
Attribute DenormalBF16MathAttr = F.getFnAttribute("denormal-fp-math-bf16");
if (DenormalBF16MathAttr.isValid()) {
StringRef Val = DenormalBF16MathAttr.getValueAsString();
if (Val != "" && Val != "preserve-sign,preserve-sign")
DenormalMathFTZDAZBF16 = false;
}

// Add CPU to the Key.
Key += CPU;

Expand Down Expand Up @@ -339,7 +348,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
I = std::make_unique<X86Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this,
MaybeAlign(F.getParent()->getOverrideStackAlignment()),
PreferVectorWidthOverride, RequiredVectorWidth);
PreferVectorWidthOverride, RequiredVectorWidth, DenormalMathFTZDAZBF16);
}
return I.get();
}
Expand Down
78 changes: 78 additions & 0 deletions llvm/test/CodeGen/X86/bfloat-ftz-daz.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avxneconvert | FileCheck %s --check-prefixes=FTZDAZ
; RUN: llc < %s -mtriple=x86_64-linux-gnu -denormal-fp-math-bf16=ieee -mattr=avxneconvert | FileCheck %s --check-prefixes=NOFTZDAZ
; RUN: llc < %s -mtriple=x86_64-linux-gnu -denormal-fp-math-bf16=preserve-sign -mattr=avxneconvert | FileCheck %s --check-prefixes=FTZDAZ
; RUN: llc < %s -mtriple=x86_64-linux-gnu -denormal-fp-math=ieee -mattr=avxneconvert | FileCheck %s --check-prefixes=FTZDAZ
; RUN: llc < %s -mtriple=x86_64-linux-gnu -denormal-fp-math=ieee -denormal-fp-math-bf16=ieee -mattr=avxneconvert | FileCheck %s --check-prefixes=NOFTZDAZ

define void @add_default_attr(ptr %pa, ptr %pb, ptr %pc) nounwind {
; FTZDAZ-LABEL: add_default_attr:
; FTZDAZ: # %bb.0:
; FTZDAZ-NEXT: movzwl (%rsi), %eax
; FTZDAZ-NEXT: shll $16, %eax
; FTZDAZ-NEXT: vmovd %eax, %xmm0
; FTZDAZ-NEXT: movzwl (%rdi), %eax
; FTZDAZ-NEXT: shll $16, %eax
; FTZDAZ-NEXT: vmovd %eax, %xmm1
; FTZDAZ-NEXT: vaddss %xmm0, %xmm1, %xmm0
; FTZDAZ-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
; FTZDAZ-NEXT: vpextrw $0, %xmm0, (%rdx)
; FTZDAZ-NEXT: retq
;
; NOFTZDAZ-LABEL: add_default_attr:
; NOFTZDAZ: # %bb.0:
; NOFTZDAZ-NEXT: pushq %rbx
; NOFTZDAZ-NEXT: movq %rdx, %rbx
; NOFTZDAZ-NEXT: movzwl (%rsi), %eax
; NOFTZDAZ-NEXT: shll $16, %eax
; NOFTZDAZ-NEXT: vmovd %eax, %xmm0
; NOFTZDAZ-NEXT: movzwl (%rdi), %eax
; NOFTZDAZ-NEXT: shll $16, %eax
; NOFTZDAZ-NEXT: vmovd %eax, %xmm1
; NOFTZDAZ-NEXT: vaddss %xmm0, %xmm1, %xmm0
; NOFTZDAZ-NEXT: callq __truncsfbf2@PLT
; NOFTZDAZ-NEXT: vpextrw $0, %xmm0, (%rbx)
; NOFTZDAZ-NEXT: popq %rbx
; NOFTZDAZ-NEXT: retq
%a = load bfloat, ptr %pa
%b = load bfloat, ptr %pb
%add = fadd bfloat %a, %b
store bfloat %add, ptr %pc
ret void
}

define void @add_no_ftz_daz_attr(ptr %pa, ptr %pb, ptr %pc) nounwind "denormal-fp-math-bf16"="ieee,ieee" {
; FTZDAZ-LABEL: add_no_ftz_daz_attr:
; FTZDAZ: # %bb.0:
; FTZDAZ-NEXT: movzwl (%rsi), %eax
; FTZDAZ-NEXT: shll $16, %eax
; FTZDAZ-NEXT: vmovd %eax, %xmm0
; FTZDAZ-NEXT: movzwl (%rdi), %eax
; FTZDAZ-NEXT: shll $16, %eax
; FTZDAZ-NEXT: vmovd %eax, %xmm1
; FTZDAZ-NEXT: vaddss %xmm0, %xmm1, %xmm0
; FTZDAZ-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
; FTZDAZ-NEXT: vpextrw $0, %xmm0, (%rdx)
; FTZDAZ-NEXT: retq
;
; NOFTZDAZ-LABEL: add_no_ftz_daz_attr:
; NOFTZDAZ: # %bb.0:
; NOFTZDAZ-NEXT: pushq %rbx
; NOFTZDAZ-NEXT: movq %rdx, %rbx
; NOFTZDAZ-NEXT: movzwl (%rsi), %eax
; NOFTZDAZ-NEXT: shll $16, %eax
; NOFTZDAZ-NEXT: vmovd %eax, %xmm0
; NOFTZDAZ-NEXT: movzwl (%rdi), %eax
; NOFTZDAZ-NEXT: shll $16, %eax
; NOFTZDAZ-NEXT: vmovd %eax, %xmm1
; NOFTZDAZ-NEXT: vaddss %xmm0, %xmm1, %xmm0
; NOFTZDAZ-NEXT: callq __truncsfbf2@PLT
; NOFTZDAZ-NEXT: vpextrw $0, %xmm0, (%rbx)
; NOFTZDAZ-NEXT: popq %rbx
; NOFTZDAZ-NEXT: retq
%a = load bfloat, ptr %pa
%b = load bfloat, ptr %pb
%add = fadd bfloat %a, %b
store bfloat %add, ptr %pc
ret void
}
23 changes: 23 additions & 0 deletions llvm/test/Other/opt-override-denormal-fp-math-bf16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; RUN: opt -S -denormal-fp-math-bf16=ieee %s | FileCheck -check-prefixes=IEEE,ALL %s
; RUN: opt -S -denormal-fp-math-bf16=preserve-sign %s | FileCheck -check-prefixes=PRESERVESIGN,ALL %s
; RUN: opt -S -denormal-fp-math-bf16=positive-zero %s | FileCheck -check-prefixes=POSITIVEZERO,ALL %s

; ALL: @no_denormal_fp_math_f32_attr() [[NOATTR:#[0-9]+]] {
define i32 @no_denormal_fp_math_f32_attr() #0 {
entry:
ret i32 0
}

; ALL: denormal_fp_math_attr_preserve_sign_ieee() [[ATTR:#[0-9]+]] {
define i32 @denormal_fp_math_attr_preserve_sign_ieee() #1 {
entry:
ret i32 0
}

; ALL-DAG: attributes [[ATTR]] = { nounwind "denormal-fp-math-bf16"="preserve-sign,ieee" }
; IEEE-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-bf16"="ieee,ieee" }
; PRESERVESIGN-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-bf16"="preserve-sign,preserve-sign" }
; POSITIVEZERO-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-bf16"="positive-zero,positive-zero" }

attributes #0 = { nounwind }
attributes #1 = { nounwind "denormal-fp-math-bf16"="preserve-sign,ieee" }
20 changes: 17 additions & 3 deletions llvm/test/Other/opt-override-denormal-fp-math-mixed.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@
; RUN: opt -S -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=PRESERVESIGNF32,ALL %s
; RUN: opt -S -denormal-fp-math-f32=positive-zero %s | FileCheck -check-prefixes=POSITIVEZEROF32,ALL %s

; RUN: opt -S -denormal-fp-math-bf16=ieee %s | FileCheck -check-prefixes=IEEEBF16,ALL %s
; RUN: opt -S -denormal-fp-math-bf16=preserve-sign %s | FileCheck -check-prefixes=PRESERVESIGNBF16,ALL %s
; RUN: opt -S -denormal-fp-math-bf16=positive-zero %s | FileCheck -check-prefixes=POSITIVEZEROBF16,ALL %s

; RUN: opt -S -denormal-fp-math=ieee -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=IEEE-BOTH,ALL %s
; RUN: opt -S -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=PRESERVESIGN-BOTH,ALL %s
; RUN: opt -S -denormal-fp-math=positive-zero -denormal-fp-math-f32=positive-zero %s | FileCheck -check-prefixes=POSITIVEZERO-BOTH,ALL %s


; RUN: opt -S -denormal-fp-math=ieee -denormal-fp-math-bf16=ieee %s | FileCheck -check-prefixes=IEEE-BOTH2,ALL %s
; RUN: opt -S -denormal-fp-math=preserve-sign -denormal-fp-math-bf16=preserve-sign %s | FileCheck -check-prefixes=PRESERVESIGN-BOTH2,ALL %s
; RUN: opt -S -denormal-fp-math=positive-zero -denormal-fp-math-bf16=positive-zero %s | FileCheck -check-prefixes=POSITIVEZERO-BOTH2,ALL %s

; ALL: @no_denormal_fp_math_attrs() [[NOATTR:#[0-9]+]] {
define i32 @no_denormal_fp_math_attrs() #0 {
Expand All @@ -24,7 +30,7 @@ entry:
ret i32 0
}

; ALL-DAG: attributes [[ATTR]] = { nounwind "denormal-fp-math"="preserve-sign,ieee" "denormal-fp-math-f32"="preserve-sign,ieee" }
; ALL-DAG: attributes [[ATTR]] = { nounwind "denormal-fp-math"="preserve-sign,ieee" "denormal-fp-math-bf16"="preserve-sign,ieee" "denormal-fp-math-f32"="preserve-sign,ieee" }

; IEEE-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="ieee,ieee" }
; PRESERVESIGN-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
Expand All @@ -34,9 +40,17 @@ entry:
; PRESERVESIGNF32-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
; POSITIVEZEROF32-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-f32"="positive-zero,positive-zero" }

; IEEEBF16-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-bf16"="ieee,ieee" }
; PRESERVESIGNBF16-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-bf16"="preserve-sign,preserve-sign" }
; POSITIVEZEROBF16-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math-bf16"="positive-zero,positive-zero" }

; IEEE-BOTH-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" }
; PRESERVESIGN-BOTH-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
; POSITIVEZERO-BOTH-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="positive-zero,positive-zero" "denormal-fp-math-f32"="positive-zero,positive-zero" }

; IEEE-BOTH2-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-bf16"="ieee,ieee" }
; PRESERVESIGN-BOTH2-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-bf16"="preserve-sign,preserve-sign" }
; POSITIVEZERO-BOTH2-DAG: attributes [[NOATTR]] = { nounwind "denormal-fp-math"="positive-zero,positive-zero" "denormal-fp-math-bf16"="positive-zero,positive-zero" }

attributes #0 = { nounwind }
attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,ieee" "denormal-fp-math-f32"="preserve-sign,ieee" }
attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,ieee" "denormal-fp-math-bf16"="preserve-sign,ieee" "denormal-fp-math-f32"="preserve-sign,ieee" }