[DO NOT MERGE Add test. FIXME: model generation

triton-inference-server · Apr 21, 2023 · f07b3f9 · f07b3f9
1 parent fb65173
commit f07b3f9
Show file tree

Hide file tree

Showing 2 changed files with 212 additions and 0 deletions.
diff --git a/qa/L0_device_memory_tracker/test.py b/qa/L0_device_memory_tracker/test.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+import numpy as np
+import time
+
+import tritonclient.http as tritonclient
+from tritonclient.utils import InferenceServerException
+
+import nvidia_smi
+
+
+class MemoryUsageTest(unittest.TestCase):
+
+    def setUp(self):
+        nvidia_smi.nvmlInit()
+        self.gpu_handle_ = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
+        self.client_ = tritonclient.InferenceServerClient(url="localhost:8000")
+
+    def tearDown(self):
+        nvidia_smi.nvmlShutdown()
+
+    def report_used_gpu_memory(self):
+        info = nvidia_smi.nvmlDeviceGetMemoryInfo(self.gpu_handle_)
+        return info.used
+
+    def is_testing_backend(self, model_name, backend_name):
+        return self.client_.get_model_config(
+            model_name)["backend"] == backend_name
+
+    def verify_recorded_usage(self, model_stat):
+        recorded_gpu_usage = 0
+        for usage in model_stat["memory_usage"]:
+            if usage["type"] == "GPU":
+                recorded_gpu_usage += usage["byte_size"]
+        # unload and verify recorded usage
+        before_total_usage = self.report_used_gpu_memory()
+        self.client_.unload_model(model_stat["name"])
+        # unload return after puttting model to unload process,
+        # wait to be finished
+        time.sleep(2)
+        usage_delta = before_total_usage - self.report_used_gpu_memory()
+        # check with tolerance as gpu usage obtained is overall usage
+        self.assertTrue(
+            usage_delta * 0.9 <= recorded_gpu_usage <= usage_delta * 1.1,
+            msg=
+            "For model {}, expect recorded usage to be in range [{}, {}], got {}"
+            .format(model_stat["name"], usage_delta * 0.9, usage_delta * 1.1,
+                    recorded_gpu_usage))
+
+    def test_onnx(self):
+        model_stats = self.client_.get_inference_statistics()["model_stats"]
+        for model_stat in model_stats:
+            if self.is_testing_backend(model_stat["name"], "onnxruntime"):
+                self.verify_recorded_usage(model_stat)
+
+    def test_plan(self):
+        model_stats = self.client_.get_inference_statistics()["model_stats"]
+        for model_stat in model_stats:
+            if self.is_testing_backend(model_stat["name"], "tensorrt"):
+                self.verify_recorded_usage(model_stat)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_device_memory_tracker/test.sh b/qa/L0_device_memory_tracker/test.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_LOG="./test.log"
+TEST_PY=test.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+rm -f *.log
+
+TEST_RESULT_FILE='test_results.txt'
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./server.log"
+
+source ../common/util.sh
+
+RET=0
+
+# prepare model repository, only contains ONNX and TRT models as the
+# corresponding backend are known to be memory.
+rm -rf models && mkdir models
+# ONNX
+cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
+
+# Convert to get TRT models against the system
+CAFFE2PLAN=../common/caffe2plan
+set +e
+mkdir -p models/vgg19_plan/1 && rm -f models/vgg19_plan/1/model.plan && \
+    $CAFFE2PLAN -b32 -n prob -o models/vgg19_plan/1/model.plan \
+                $DATADIR/caffe_models/vgg19.prototxt $DATADIR/caffe_models/vgg19.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate vgg19 PLAN\n***"
+    exit 1
+fi
+
+mkdir -p models/resnet50_plan/1 && rm -f models/resnet50_plan/1/model.plan && \
+    $CAFFE2PLAN -b32 -n prob -o models/resnet50_plan/1/model.plan \
+                $DATADIR/caffe_models/resnet50.prototxt $DATADIR/caffe_models/resnet50.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
+    exit 1
+fi
+
+mkdir -p models/resnet152_plan/1 && rm -f models/resnet152_plan/1/model.plan && \
+    $CAFFE2PLAN -h -b32 -n prob -o models/resnet152_plan/1/model.plan \
+                $DATADIR/caffe_models/resnet152.prototxt $DATADIR/caffe_models/resnet152.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate resnet152 PLAN\n***"
+    exit 1
+fi
+set -e
+
+# testing use nvidia-smi for Python to validate the reported usage
+pip install nvidia-ml-py3
+
+# Start server to load all models (in parallel), then gradually unload
+# the models and expect the memory usage changes matches what are reported
+# in statistic.
+SERVER_ARGS="--model-repository=models --model-control-mode=explicit --load-model=*"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST_PY > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $SERVER_LOG
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET