llvm · MacDue · Jan 9, 2025 · Dec 9, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/llvm/include/llvm/IR/VectorTypeUtils.h b/llvm/include/llvm/IR/VectorTypeUtils.h
@@ -40,6 +40,10 @@ Type *toScalarizedStructTy(StructType *StructTy);
 /// are vectors of matching element count. This does not include empty structs.
 bool isVectorizedStructTy(StructType *StructTy);
 
+/// Returns true if `StructTy` is an unpacked literal struct where all elements
+/// are scalars that can be used as vector element types.
+bool canVectorizeStructTy(StructType *StructTy);
+
 /// A helper for converting to vectorized types. For scalar types, this is
 /// equivalent to calling `toVectorTy`. For struct types, this returns a new
 /// struct where each element type has been widened to a vector type.
@@ -71,6 +75,18 @@ inline bool isVectorizedTy(Type *Ty) {
  return Ty->isVectorTy();
 }
 
+/// Returns true if `Ty` is a valid vector element type, void, or an unpacked
+/// literal struct where all elements are valid vector element types.
+/// Note: Even if a type can be vectorized that does not mean it is valid to do
+/// so in all cases. For example, a vectorized struct (as returned by
+/// toVectorizedTy) does not perform (de)interleaving, so it can't be used for
+/// vectorizing loads/stores.
+inline bool canVectorizeTy(Type *Ty) {
+ if (StructType *StructTy = dyn_cast<StructType>(Ty))
+ return canVectorizeStructTy(StructTy);
+ return Ty->isVoidTy() || VectorType::isValidElementType(Ty);
+}
+
 /// Returns the types contained in `Ty`. For struct types, it returns the
 /// elements, all other types are returned directly.
 inline ArrayRef<Type *> getContainedTypes(Type *const &Ty) {

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -422,6 +422,10 @@ class LoopVectorizationLegality {
  /// has a vectorized variant available.
  bool hasVectorCallVariants() const { return VecCallVariantsFound; }
 
+ /// Returns true if there is at least one function call in the loop which
+ /// returns a struct type and needs to be vectorized.
+ bool hasStructVectorCall() const { return StructVecCallFound; }
+
  unsigned getNumStores() const { return LAI->getNumStores(); }
  unsigned getNumLoads() const { return LAI->getNumLoads(); }
 
@@ -644,6 +648,12 @@ class LoopVectorizationLegality {
  /// the use of those function variants.
  bool VecCallVariantsFound = false;
 
+ /// If we find a call (to be vectorized) that returns a struct type, record
+ /// that so we can bail out until this is supported.
+ /// TODO: Remove this flag once vectorizing calls with struct returns is
+ /// supported.
+ bool StructVecCallFound = false;
+
  /// Indicates whether this loop has an uncountable early exit, i.e. an
  /// uncountable exiting block that is not the latch.
  bool HasUncountableEarlyExit = false;

diff --git a/llvm/lib/IR/VectorTypeUtils.cpp b/llvm/lib/IR/VectorTypeUtils.cpp
@@ -52,3 +52,11 @@ bool llvm::isVectorizedStructTy(StructType *StructTy) {
  return Ty->isVectorTy() && cast<VectorType>(Ty)->getElementCount() == VF;
  });
 }
+
+/// Returns true if `StructTy` is an unpacked literal struct where all elements
+/// are scalars that can be used as vector element types.
+bool llvm::canVectorizeStructTy(StructType *StructTy) {
+ auto ElemTys = StructTy->elements();
+ return !ElemTys.empty() && isUnpackedStructLiteral(StructTy) &&
+ all_of(ElemTys, VectorType::isValidElementType);
+}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -778,6 +778,18 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
  return Scalarize;
 }
 
+/// Returns true if the call return type `Ty` can be widened by the loop
+/// vectorizer.
+static bool canWidenCallReturnType(Type *Ty) {
+ auto *StructTy = dyn_cast<StructType>(Ty);
+ // TODO: Remove the homogeneous types restriction. This is just an initial
+ // simplification. When we want to support things like the overflow intrinsics
+ // we will have to lift this restriction.
+ if (StructTy && !StructTy->containsHomogeneousTypes())
+ return false;
+ return canVectorizeTy(StructTy);
+}
+
 bool LoopVectorizationLegality::canVectorizeInstrs() {
  BasicBlock *Header = TheLoop->getHeader();
 
@@ -942,11 +954,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
  if (CI && !VFDatabase::getMappings(*CI).empty())
  VecCallVariantsFound = true;
 
+ auto CanWidenInstructionTy = [this](Instruction const &Inst) {
+ Type *InstTy = Inst.getType();
+ if (!isa<StructType>(InstTy))
+ return canVectorizeTy(InstTy);
+
+ // For now, we only recognize struct values returned from calls where
+ // all users are extractvalue as vectorizable. All element types of the
+ // struct must be types that can be widened.
+ if (isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
+ all_of(Inst.users(), IsaPred<ExtractValueInst>)) {
+ // TODO: Remove the `StructVecCallFound` flag once vectorizing calls
+ // with struct returns is supported.
+ StructVecCallFound = true;
+ return true;
+ }
+
+ return false;
+ };
+
  // Check that the instruction return type is vectorizable.
  // We can't vectorize casts from vector type to scalar type.
  // Also, we can't vectorize extractelement instructions.
- if ((!VectorType::isValidElementType(I.getType()) &&
- !I.getType()->isVoidTy()) ||
+ if (!CanWidenInstructionTy(I) ||
  (isa<CastInst>(I) &&
  !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
  isa<ExtractElementInst>(I)) {

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10348,6 +10348,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
  return false;
  }
 
+ if (LVL.hasStructVectorCall()) {
+ reportVectorizationFailure("Auto-vectorization of calls that return struct "
+ "types is not yet supported",
+ "StructCallVectorizationUnsupported", ORE, L);
+ return false;
+ }
+
  // Entrance to the VPlan-native vectorization path. Outer loops are processed
  // here. They may require CFG and instruction level transformations before
  // even evaluating whether vectorization is profitable. Since we cannot modify

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Tests basic vectorization of scalable homogeneous struct literal returns.
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_widen
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f64_widen
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
+ %in_val = load double, ptr %arrayidx, align 8
+ %call = tail call { double, double } @bar(double %in_val) #1
+ %extract_a = extractvalue { double, double } %call, 0
+ %extract_b = extractvalue { double, double } %call, 1
+ %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
+ store double %extract_a, ptr %arrayidx2, align 8
+ %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
+ store double %extract_b, ptr %arrayidx4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+declare { float, float } @foo(float)
+declare { double, double } @bar(double)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
+declare { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double>, <vscale x 2 x i1>)
+
+
+attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
+attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar(scalable_vec_masked_bar)" }