microsoft · adrianlizarraga · Mar 10, 2023 · Sep 23, 2022 · Sep 29, 2022 · Oct 5, 2022
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -926,6 +926,10 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
 
  if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
  file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.so" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.dll")
+ if (${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc")
+ file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/target/hexagon-v68/lib/unsigned/libQnnHtpV68Skel.so")
+ list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB})
+ endif()
  message(STATUS "QNN lib files: " ${QNN_LIB_FILES})
  add_custom_command(
  TARGET ${test_data_target} POST_BUILD

diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc
@@ -333,6 +333,25 @@ bool WhereNodeGroupSelector::Check(const GraphViewer &graph_viewer, const Node &
 
 }
 
+bool InstanceNormalizationNodeGroupSelector::Check(const GraphViewer& graph_viewer,
+ const Node& node,
+ const std::vector<const Node*>& dq_nodes,
+ const std::vector<const Node*>& q_nodes) const {
+ if (!CheckQDQNodes(graph_viewer, node, dq_nodes, q_nodes)) {
+ return false;
+ }
+
+ int32_t dt_input = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+ int32_t dt_scale = dq_nodes[1]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+ int32_t dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+ int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+
+ // Input, output, and scale need to be the same type. The bias is int32.
+ return (dt_input == dt_output) &&
+ (dt_input == dt_scale) &&
+ (dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32);
+}
+
 } // namespace QDQ
 } // namespace onnxruntime
 

diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h
@@ -141,6 +141,15 @@ class GemmNodeGroupSelector : public NodeGroupSelector {
  const std::vector<const Node*>& q_nodes) const override;
 };
 
+// Input: DQ nodes for input, scale, and B
+// Output: Q node for output
+class InstanceNormalizationNodeGroupSelector : public NodeGroupSelector {
+ private:
+ bool Check(const GraphViewer& graph_viewer, const Node& node,
+ const std::vector<const Node*>& dq_nodes,
+ const std::vector<const Node*>& q_nodes) const override;
+};
+
 /*
  * NodeSelector instances for use in the QDQ::SelectorActionTransformer.
  */
@@ -232,6 +241,14 @@ class GemmSelector : public BaseSelector {
  void UpdateBuilder(NodesToOptimizeIndicesBuilder&) const override;
 };
 
+// Input: DQ nodes for input, scale, and B (bias)
+// Output: Q node for output
+class InstanceNormalizationSelector : public BaseSelector {
+ public:
+ InstanceNormalizationSelector()
+ : BaseSelector(std::make_unique<InstanceNormalizationNodeGroupSelector>()) {}
+};
+
 } // namespace QDQ
 } // namespace onnxruntime
 

diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@@ -74,6 +74,9 @@ static const OpVersionsAndSelector::OpVersionsMap GetMatMulOpVersionsMap() {
 static const OpVersionsAndSelector::OpVersionsMap GetGemmOpVersionsMap() {
  return {{"Gemm", {}}};
 }
+static const OpVersionsAndSelector::OpVersionsMap GetInstanceNormalizationOpVersionsMap() {
+ return {{"InstanceNormalization", {}}};
+}
 
 /* Selector rules registration related */
 void RegisterMiscSelectors(Selectors& qdq_selectors) {
@@ -133,6 +136,13 @@ void RegisterGemmSelector(Selectors& qdq_selectors) {
  std::move(selector));
 }
 
+void RegisterInstanceNormalizationSelector(Selectors& qdq_selectors) {
+ /* register selector for InstanceNormalization op */
+ std::unique_ptr<NodeGroupSelector> selector = std::make_unique<InstanceNormalizationNodeGroupSelector>();
+ qdq_selectors.RegisterSelector(GetInstanceNormalizationOpVersionsMap(),
+ std::move(selector));
+}
+
 void SelectorManager::CreateSelectors() {
  RegisterMiscSelectors(qdq_selectors_);
  RegisterUnarySelectors(qdq_selectors_);
@@ -142,6 +152,7 @@ void SelectorManager::CreateSelectors() {
  RegisterConvTransposeSelector(qdq_selectors_);
  RegisterMatMulSelector(qdq_selectors_);
  RegisterGemmSelector(qdq_selectors_);
+ RegisterInstanceNormalizationSelector(qdq_selectors_);
 }
 
 void SelectorManager::InitializeSelectorsMap() {

diff --git a/onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc b/onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc
@@ -2043,7 +2043,7 @@ const std::unordered_set<std::string_view>& GetLayoutSensitiveOps() {
  "Conv", "QLinearConv", "BatchNormalization",
  "AveragePool", "GlobalAveragePool", "MaxPool",
  "GlobalMaxPool", "LRN", "GridSample",
- "DepthToSpace", "SpaceToDepth", "ConvTranspose", "MaxUnpool"};
+ "DepthToSpace", "SpaceToDepth", "ConvTranspose", "MaxUnpool", "InstanceNormalization"};
 
  return layout_sensitive_ops;
 }

diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
@@ -120,6 +120,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
  {
  CreateTileOpBuilder("Tile", *this);
  }
+
+ {
+ CreateInstanceNormOpBuilder("InstanceNormalization", *this);
+ }
 }
 
 const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type) {

diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h
@@ -76,5 +76,7 @@ void CreateTopKOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_
 
 void CreateTileOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 
+void CreateInstanceNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+
 } // namespace qnn
 } // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
@@ -158,7 +158,8 @@ class BaseOpBuilder : public IOpBuilder {
  {"ArgMin", "Argmin"},
  {"ConvTranspose", "TransposeConv2d"},
  {"Tile", "Tile"},
- {"TopK", "TopK"}};
+ {"TopK", "TopK"},
+ {"InstanceNormalization", "InstanceNorm"}};
  auto it = onnx_op_type_to_qnn_op_type.find(onnx_op_type);
  ORT_ENFORCE(it != onnx_op_type_to_qnn_op_type.end());
  return it->second;

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
@@ -54,6 +54,7 @@ class ConvOpBuilder : public BaseOpBuilder {
 // The nodes from 1st call of GetCapability do not get layout transformer applied, it's still NCHW
 // The nodes from 2nd call of GetCapability get layout transformer applied, it's NHWC
 // Need to do op validation in 1st call of GetCapability
+// TODO: Check if node domain == kMSInternalNHWCDomain to determine if the layout has been transformed.
 Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
  const NodeUnit& node_unit,
  const logging::Logger& logger,

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/common.h"
+#include "core/providers/shared/utils/utils.h"
+#include "core/framework/tensorprotoutils.h"
+#include "core/providers/qnn/builder/qnn_model_wrapper.h"
+#include "core/providers/qnn/builder/op_builder_factory.h"
+#include "core/common/safeint.h"
+#include "onnx/defs/data_type_utils.h"
+
+#include "base_op_builder.h"
+
+namespace onnxruntime {
+namespace qnn {
+
+class InstanceNormOpBuilder : public BaseOpBuilder {
+ public:
+ InstanceNormOpBuilder() : BaseOpBuilder("InstanceNormOpBuilder") {}
+ ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(InstanceNormOpBuilder);
+
+ Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
+ const NodeUnit& node_unit,
+ const logging::Logger& logger,
+ bool is_quantized_model) const override final ORT_MUST_USE_RESULT;
+
+ protected:
+ Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+ const NodeUnit& node_unit,
+ std::vector<std::string>&& input_names,
+ const logging::Logger& logger,
+ bool is_quantized_model,
+ bool do_op_validation) const override ORT_MUST_USE_RESULT;
+};
+
+// Instance normalization op is sensitive to data layout.
+// The nodes from 1st call of GetCapability do not get layout transformer applied, so their shapes are still NCHW.
+// The nodes from 2nd call of GetCapability get their layout transformed to NHWC.
+// Therefore, we need to check the node domain to determine if the layout has been transformed.
+Status InstanceNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
+ const NodeUnit& node_unit,
+ const logging::Logger& logger,
+ bool is_quantized_model) const {
+ ORT_UNUSED_PARAMETER(logger);
+
+ const auto float_elem_type = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float");
+
+ // Check input type is float for CPU.
+ const auto& inputs = node_unit.Inputs();
+ ONNX_NAMESPACE::DataType input_data_type = inputs[0].node_arg.Type();
+ if (!is_quantized_model && input_data_type != float_elem_type) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm data type " + *input_data_type + " is not supported in CPU backend.");
+ }
+
+ // Also check output type is float for CPU.
+ const auto& outputs = node_unit.Outputs();
+ ONNX_NAMESPACE::DataType output_data_type = outputs[0].node_arg.Type();
+ if (!is_quantized_model && output_data_type != float_elem_type) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm data type " + *output_data_type + " is not supported in CPU backend.");
+ }
+
+ std::vector<uint32_t> input_shape;
+ ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input_shape), "Cannot get shape of input 0");
+ const size_t input_rank = input_shape.size();
+
+ if (input_rank <= 2 || input_rank > 4) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm only supports input ranks of size 3 or 4.");
+ }
+
+ const uint32_t num_channels = (node_unit.Domain() == kMSInternalNHWCDomain) ? input_shape.back() : input_shape[1];
+
+ std::vector<uint32_t> scale_shape;
+ ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[1].node_arg, scale_shape), "Cannot get shape of input 1 (scale)");
+ if (scale_shape.size() != 1 || scale_shape[0] != num_channels) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm input 1 (scale) must have 1D shape [channel].");
+ }
+
+ std::vector<uint32_t> bias_shape;
+ ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[2].node_arg, bias_shape), "Cannot get shape of input 2 (bias)");
+ if (bias_shape.size() != 1 || bias_shape[0] != num_channels) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm input 2 (bias) must have 1D shape [channel].");
+ }
+
+ NodeAttrHelper node_helper(node_unit);
+ const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec.
+ if (epsilon <= 0.0f) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN InstanceNorm epsilon must be greater than 0.0");
+ }
+
+ return Status::OK();
+}
+
+Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+ const NodeUnit& node_unit,
+ std::vector<std::string>&& input_names,
+ const logging::Logger& logger,
+ bool is_quantized_model,
+ bool do_op_validation) const {
+ NodeAttrHelper node_helper(node_unit);
+ std::vector<std::string> param_tensor_names;
+
+ const float epsilon = node_helper.Get("epsilon", 1e-05f); // Default is 1e-05 according to ONNX spec.
+ Qnn_Scalar_t epsilon_param = QNN_SCALAR_INIT;
+ epsilon_param.dataType = QNN_DATATYPE_FLOAT_32;
+ epsilon_param.floatValue = epsilon;
+ QnnParamWrapper epsilon_param_wrapper(node_unit.Index(),
+ node_unit.Name(),
+ qnn_def::epsilon,
+ epsilon_param);
+ param_tensor_names.push_back(epsilon_param_wrapper.GetParamTensorName());
+ qnn_model_wrapper.AddParamWrapper(std::move(epsilon_param_wrapper));
+
+ ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
+ std::move(input_names),
+ std::move(param_tensor_names),
+ logger, is_quantized_model, do_op_validation));
+
+ return Status::OK();
+}
+
+void CreateInstanceNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+ op_registrations.AddOpBuilder(op_type, std::make_unique<InstanceNormOpBuilder>());
+}
+
+} // namespace qnn
+} // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc
@@ -44,6 +44,7 @@ class PoolOpBuilder : public BaseOpBuilder {
 // The nodes from 1st call of GetCapability do not get layout transformer applied, it's still NCHW
 // The nodes from 2nd call of GetCapability get layout transformer applied, it's NHWC
 // Need to do op validation in 1st call of GetCapability
+// TODO: Check if node domain == kMSInternalNHWCDomain to determine if the layout has been transformed.
 Status PoolOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
  const NodeUnit& node_unit,
  const logging::Logger& logger,

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc
@@ -44,6 +44,7 @@ class ResizeOpBuilder : public BaseOpBuilder {
 // The nodes from 1st call of GetCapability do not get layout transformer applied, it's still NCHW
 // The nodes from 2nd call of GetCapability get layout transformer applied, it's NHWC
 // Need to do op validation in 1st call of GetCapability
+// TODO: Check if node domain == kMSInternalNHWCDomain to determine if the layout has been transformed.
 Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
  const NodeUnit& node_unit,
  const logging::Logger& logger,

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@@ -40,7 +40,8 @@ class SimpleOpBuilder : public BaseOpBuilder {
  const std::string input_name) const;
  Status HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper,
  const NodeUnit& node_unit,
- std::vector<std::string>&& input_names) const;
+ std::vector<std::string>&& input_names,
+ bool is_quantized_model) const;
 };
 
 Status SimpleOpBuilder::ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
@@ -153,14 +154,42 @@ Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper
 
 // Support Transpose single node in QDQ model since it just change the data layout
 // Single node doesn't has any quantization parameters
-// Input tensors are created by previous node, output tensors created by next node
+// Input tensors are created by the previous node. Output tensors are created by the next node,
+// unless the output is the graph's final output.
 Status SimpleOpBuilder::HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper,
  const NodeUnit& node_unit,
- std::vector<std::string>&& input_names) const {
+ std::vector<std::string>&& input_names,
+ bool is_quantized_model) const {
  std::vector<std::string> param_tensor_names;
  ORT_RETURN_IF_ERROR(ProcessPermAttribute(qnn_model_wrapper, node_unit, param_tensor_names));
+ const auto& outputs = node_unit.Outputs();
+ ORT_ENFORCE(outputs.size() == 1, "QNN Transpose node must have a single output.");
+ const auto& output = outputs[0];
+ auto& output_name = output.node_arg.Name();
+
+ const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(output_name);
+
+ // Need to add output to the QNN model wrapper if this Transpose node's output is also
+ // the graph's output.
+ if (is_graph_output) {
+ const auto* type_proto = output.node_arg.TypeAsProto();
+ Qnn_DataType_t qnn_data_type = QNN_DATATYPE_UNDEFINED;
+ ORT_RETURN_IF_ERROR(GetQnnDataType(is_quantized_model, type_proto, qnn_data_type));
+
+ Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
+ std::vector<uint32_t> output_shape;
+ ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output.node_arg, output_shape),
+ "Cannot get shape for QNN Transpose output");
+
+ QnnTensorWrapper output_tensorwrapper(output_name,
+ QNN_TENSOR_TYPE_APP_READ,
+ qnn_data_type,
+ quantize_param,
+ std::move(output_shape));
+ ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)),
+ "Failed to add output tensor for QNN Transpose");
+ }
 
- auto& output_name = node_unit.Outputs()[0].node_arg.Name();
  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
  qnn_def::package_name,
  GetQnnOpType(node_unit.OpType()),
@@ -186,7 +215,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
  } else if (is_quantized_model && NodeUnit::Type::SingleNode == node_unit.UnitType() &&
  node_unit.OpType() == "Transpose") {
  LOGS(logger, VERBOSE) << "Add single Transpose node: " << node_unit.Name();
- return HandleSingleTransposeNode(qnn_model_wrapper, node_unit, std::move(input_names));
+ return HandleSingleTransposeNode(qnn_model_wrapper, node_unit, std::move(input_names), is_quantized_model);
  }
 
  std::vector<std::string> param_tensor_names;

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h
@@ -426,6 +426,7 @@ const std::string nearest_mode = "nearest_mode";
 const std::string rounding_mode = "rounding_mode";
 const std::string topk = "k";
 const std::string multiples = "multiples";
+const std::string epsilon = "epsilon";
 } // namespace qnn_def
 
 } // namespace qnn

diff --git a/onnxruntime/python/tools/quantization/operators/instnorm.py b/onnxruntime/python/tools/quantization/operators/instnorm.py
@@ -0,0 +1,29 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from .qdq_base_operator import QDQOperatorBase
+
+
+class QDQInstanceNormalization(QDQOperatorBase):
+ def __init__(self, onnx_quantizer, onnx_node):
+ super().__init__(onnx_quantizer, onnx_node)
+
+ def quantize(self):
+ node = self.node
+ assert node.op_type == "InstanceNormalization"
+
+ # Input
+ self.quantizer.quantize_activation_tensor(node.input[0])
+ if not self.disable_qdq_for_node_output:
+ self.quantizer.quantize_activation_tensor(node.output[0])
+
+ # Scale
+ if self.quantizer.is_per_channel():
+ self.quantizer.quantize_weight_tensor_per_channel(node.input[1], axis=1)
+ else:
+ self.quantizer.quantize_weight_tensor(node.input[1])
+
+ # Bias
+ self.quantizer.quantize_bias_tensor(node.input[2], node.input[0], node.input[1])
-Original file line number
+Diff line change
@@ Expand Up / @@ -120,6 +120,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() { @@
   {
   CreateTileOpBuilder("Tile", *this);
   }
+  {
+  CreateInstanceNormOpBuilder("InstanceNormalization", *this);
+  }
  }
  const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type) {
@@ Expand Down @@