[Relay] Fix invalid shape function for "copy" operator (#9749)

The 'script' for of the shape function was ill-formed, resulting in a TIR shape function which did not assign to it's output, which in turn caused either OOM or assert fails as uninitialized dimensions worked their way downstream. That fix is in python/tvm/relay/op/tensor.py. Everything else is for testing and debugging as I tracked this down. Special thanks to Lily for helping me with the scalar vs tensor switch in the copy shape function. [This is CORE-112 in OctoML JIRA.]
apache · Dec 18, 2021 · 89b1676 · 89b1676
1 parent e1255c9
commit 89b1676
Show file tree

Hide file tree

Showing 10 changed files with 364 additions and 113 deletions.
diff --git a/include/tvm/runtime/debug.h b/include/tvm/runtime/debug.h
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/runtime/debug.h
+ * \brief Helpers for debugging at runtime.
+ */
+#ifndef TVM_RUNTIME_DEBUG_H_
+#define TVM_RUNTIME_DEBUG_H_
+
+#include <tvm/runtime/container/adt.h>
+#include <tvm/runtime/ndarray.h>
+
+#include <ostream>
+#include <string>
+
+namespace tvm {
+namespace runtime {
+
+/*!
+ * \brief Helpers to describe runtime objects in human-friendly form. For \p nd_arrays we show their
+ * shapes and dtypes, but also their contents if 'small' and on the \p host_device (mostly so that
+ * we can see dynamic shapes as they are computed). For \p adts we show the ADT fields. For
+ * \p objects we dispatch to one of the above as appropriate.
+ */
+void AppendNDArray(std::ostream& os, const NDArray& nd_array, const DLDevice& host_device,
+                   bool show_content = true);
+void AppendADT(std::ostream& os, const ADT& adt, const DLDevice& host_device,
+               bool show_content = true);
+void AppendRuntimeObject(std::ostream& os, const ObjectRef& object, const DLDevice& host_device,
+                         bool show_content = true);
+std::string RuntimeObject2String(const ObjectRef& object, const DLDevice& host_device,
+                                 bool show_content = true);
+
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_DEBUG_H_
diff --git a/python/tvm/relay/op/tensor.py b/python/tvm/relay/op/tensor.py
@@ -1178,16 +1178,29 @@ def copy(data):
 
 
 @script
-def _copy_shape_func(data_shape):
-    return data_shape
+def _copy_shape_func_tensor(data_shape):
+    ndim = data_shape.shape[0]
+    out = output_tensor((ndim,), "int64")
+    for i in const_range(ndim):
+        out[i] = data_shape[i]
+    return out
+
+
+@script
+def _copy_shape_func_scalar(data_shape):
+    out = output_tensor((), "int64")
+    return out
 
 
 @reg.register_shape_func("copy", False)
 def copy_shape_func(attrs, inputs, _):
     """
     Shape function for copy op.
     """
-    return [_copy_shape_func(inputs[0])]
+    input = inputs[0]
+    if len(input.shape) == 0:
+        return [_copy_shape_func_scalar(input)]
+    return [_copy_shape_func_tensor(input)]
 
 
 def device_copy(data, src_device, dst_device):

diff --git a/src/relay/backend/te_compiler.cc b/src/relay/backend/te_compiler.cc
@@ -350,7 +350,7 @@ class TECompilerImpl : public TECompilerNode {
 
   // implement lowered shape func
   CCacheValue LowerShapeFuncInternal(const CCacheKey& key) {
-    VLOG(1) << "lowering dynamic shape function:" << std::endl
+    VLOG(1) << "lowering dynamic shape function for:" << std::endl
             << PrettyPrint(key->source_func) << std::endl
             << "for target:" << std::endl
             << key->target->ToDebugString();

diff --git a/src/relay/backend/te_compiler_cache.cc b/src/relay/backend/te_compiler_cache.cc
@@ -145,7 +145,7 @@ class ScheduleBuilder : public backend::MemoizedExprTranslator<Array<te::Tensor>
       candidate_name = truncated_name.str();
     }
 
-    // TODO(mbs): This should be the definititive global by which the PrimFunc is known and
+    // TODO(mbs): This should be the definitive global by which the PrimFunc is known and
     // no other GlobalVar ctors should appear inside the lowering machinery.
     auto prim_fn_var = GlobalVar(renamer(candidate_name));
     prim_fn_var->checked_type_ = relay_func->checked_type();
@@ -371,6 +371,7 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
 
   CachedFunc Create(const Function& prim_func, const Target& target,
                     std::function<std::string(std::string)> renamer) {
+    VLOG_CONTEXT << "MakeShapeFunc";
     TShapeDataDependent shape_func_param_states;
 
     for (auto param : prim_func->params) {
@@ -399,11 +400,12 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
     // Setup the name;
     readable_name_stream_ << "shape_func";
 
-    // Create the `te::Tensor`s which represent the output.
-    auto outputs = VisitExpr(prim_func->body);
+    // Create the tensor expressions representing the output shapes.
+    Array<te::Tensor> outputs = VisitExpr(prim_func->body);
 
     // Generate a name.
     auto candidate_name = readable_name_stream_.str();
+
     constexpr static size_t kMaxFuncNameLength = 80;
     // WARNING: Please make sure to also update TVM_CRT_MAX_STRLEN_FUNCTION_NAME
     //          whenever the value of kMaxFuncNameLength changes
@@ -463,7 +465,7 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
     for (auto t : outputs) {
       out_ops.push_back(t->op);
     }
-    auto schedule = te::create_schedule(out_ops);
+    te::Schedule schedule = te::create_schedule(out_ops);
     tvm::te::AutoInlineInjective(schedule);
     for (const auto& scalar : scalars_) {
       auto scalar_op = scalar->op;
@@ -589,12 +591,15 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
   }
 
   Array<te::Tensor> VisitExpr_(const CallNode* call_node) final {
+    VLOG(1) << "considering call:" << std::endl << PrettyPrint(GetRef<Call>(call_node));
     if (auto* func = call_node->op.as<FunctionNode>()) {
+      VLOG(1) << "user function";
       for (size_t i = 0; i < func->params.size(); ++i) {
         param_arg_map_[func->params[i]] = call_node->args[i];
       }
       return VisitExpr(func->body);
     }
+
     static auto fshape_func = Op::GetAttrMap<FShapeFunc>("FShapeFunc");
     static auto tshape_data_dependent = Op::GetAttrMap<TShapeDataDependent>("TShapeDataDependent");
     ICHECK(call_node->op.as<OpNode>()) << "Primitive function only allows call into primitive ops";
@@ -635,20 +640,16 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
     // Get output ndims
     auto ret_type = call_node->checked_type();
     Array<IndexExpr> out_ndims;
-    if (const auto* ttype = ret_type.as<TensorTypeNode>()) {
+    for (const auto& ttype : FlattenTupleType(ret_type)) {
       out_ndims.push_back(IntImm(DataType::Int(32), ttype->shape.size()));
-    } else {
-      auto rtype = ret_type.as<TupleTypeNode>();
-      // TODO(@icemelon): Allow recursive tuple
-      ICHECK(rtype);
-      for (size_t i = 0; i < rtype->fields.size(); ++i) {
-        auto ttype = rtype->fields[i].as<TensorTypeNode>();
-        ICHECK(ttype);
-        out_ndims.push_back(IntImm(DataType::Int(32), ttype->shape.size()));
-      }
     }
+
     // Call shape function
-    auto outputs = fshape_func[op](call_node->attrs, inputs, out_ndims);
+    Array<te::Tensor> outputs = fshape_func[op](call_node->attrs, inputs, out_ndims);
+    VLOG(1) << "shape function for '" << op->name << "' with inputs:" << std::endl
+            << inputs << std::endl
+            << "yielded outputs:" << std::endl
+            << outputs;
     readable_name_stream_ << "_" << op->name;
     return outputs;
   }

diff --git a/src/runtime/debug.cc b/src/runtime/debug.cc
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/debug.cc
+ * \brief Helpers for debugging at runtime.
+ */
+
+#include <tvm/runtime/debug.h>
+
+namespace tvm {
+namespace runtime {
+
+template <typename T>
+void AppendMembers(std::ostream& os, const NDArray& nd_array, int64_t dim0) {
+  os << "=[";
+  for (int64_t i = 0; i < dim0; ++i) {
+    if (i > 0) {
+      os << ",";
+    }
+    os << reinterpret_cast<T*>(nd_array->data)[i];
+  }
+  os << "]";
+}
+
+void AppendNDArray(std::ostream& os, const NDArray& nd_array, const DLDevice& host_device,
+                   bool show_contents) {
+  os << "NDArray[";
+  os << "(";
+  for (int dim = 0; dim < nd_array->ndim; ++dim) {
+    if (dim > 0) {
+      os << ",";
+    }
+    os << nd_array->shape[dim];
+  }
+  std::string basic_type = DLDataType2String(nd_array->dtype);
+  os << ")," << basic_type;
+  os << ",(" << nd_array->device.device_type;
+  os << "," << nd_array->device.device_id;
+  os << ")]";
+  if (show_contents && nd_array->device.device_type == host_device.device_type &&
+      nd_array->device.device_id == host_device.device_id) {
+    int64_t dim0;
+    if (nd_array->ndim == 0) {
+      dim0 = 1;
+    } else if (nd_array->ndim == 1) {
+      dim0 = nd_array->shape[0];
+      if (dim0 > 10) {
+        // Too large.
+        dim0 = 0;
+      }
+    } else {
+      // Not rank-1.
+      dim0 = 0;
+    }
+    if (dim0 > 0) {
+      if (basic_type == "bool") {
+        AppendMembers<bool>(os, nd_array, dim0);
+      } else if (basic_type == "int8") {
+        AppendMembers<int8_t>(os, nd_array, dim0);
+      } else if (basic_type == "int16") {
+        AppendMembers<int16_t>(os, nd_array, dim0);
+      } else if (basic_type == "int32") {
+        AppendMembers<int32_t>(os, nd_array, dim0);
+      } else if (basic_type == "int64") {
+        AppendMembers<int64_t>(os, nd_array, dim0);
+      } else if (basic_type == "uint8") {
+        AppendMembers<uint8_t>(os, nd_array, dim0);
+      } else if (basic_type == "uint16") {
+        AppendMembers<uint16_t>(os, nd_array, dim0);
+      } else if (basic_type == "uint32") {
+        AppendMembers<uint32_t>(os, nd_array, dim0);
+      } else if (basic_type == "uint64") {
+        AppendMembers<uint64_t>(os, nd_array, dim0);
+      } else if (basic_type == "float32") {
+        AppendMembers<float>(os, nd_array, dim0);
+      } else if (basic_type == "float64") {
+        AppendMembers<double>(os, nd_array, dim0);
+      }
+    }
+  }
+}
+
+void AppendADT(std::ostream& os, const ADT& adt, const DLDevice& host_device, bool show_contents) {
+  os << "ADT(" << adt->tag;
+  for (size_t i = 0; i < adt->size; ++i) {
+    os << ",";
+    AppendRuntimeObject(os, adt[i], host_device, show_contents);
+  }
+  os << ")";
+}
+
+void AppendRuntimeObject(std::ostream& os, const ObjectRef& object, const DLDevice& host_device,
+                         bool show_contents) {
+  if (const auto* adt_obj = object.as<ADTObj>()) {
+    AppendADT(os, GetRef<ADT>(adt_obj), host_device, show_contents);
+  } else if (const auto* nd_array_cont = object.as<NDArray::Container>()) {
+    AppendNDArray(os, GetRef<NDArray>(nd_array_cont), host_device, show_contents);
+  } else {
+    os << "?";
+  }
+}
+
+std::string RuntimeObject2String(const ObjectRef& object, const DLDevice& host_device,
+                                 bool show_contents) {
+  std::ostringstream os;
+  AppendRuntimeObject(os, object, host_device, show_contents);
+  return os.str();
+}
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/vm/executable.cc b/src/runtime/vm/executable.cc
@@ -24,6 +24,7 @@
 
 #include <dmlc/memory_io.h>
 #include <tvm/runtime/c_runtime_api.h>
+#include <tvm/runtime/debug.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/runtime/vm/executable.h>
 #include <tvm/runtime/vm/vm.h>
@@ -171,27 +172,13 @@ std::string Executable::GetBytecode() const {
   return oss.str();
 }
 
-namespace {
-String ShapeString(const ShapeTuple& shape_tuple, DLDataType dtype) {
-  std::stringstream sizes;
-  sizes << DLDataType2String(dtype) << "[";
-  for (size_t i = 0; i < shape_tuple.size(); i++) {
-    if (i != 0) {
-      sizes << ", ";
-    }
-    sizes << shape_tuple.data()[i];
-  }
-  sizes << "]";
-  return String(sizes.str());
-}
-}  // namespace
-
 std::string Executable::GetConstants() const {
   std::ostringstream oss;
   for (size_t i = 0; i < constants.size(); ++i) {
     const auto& constant = constants[i];
     auto ndarray = Downcast<NDArray>(constant);
-    oss << "VM Const[" << i << "]: has shape " << ShapeString(ndarray.Shape(), ndarray->dtype)
+    oss << "VM Const[" << i
+        << "]: " << RuntimeObject2String(ndarray, virtual_devices[host_device_index])
         << " on device index " << const_device_indexes[i] << std::endl;
   }
   return oss.str();