Skip to content

Commit

Permalink
[DO NOT MERGE Add test. FIXME: model generation
Browse files Browse the repository at this point in the history
  • Loading branch information
GuanLuo committed Apr 21, 2023
1 parent fb65173 commit f07b3f9
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 0 deletions.
90 changes: 90 additions & 0 deletions qa/L0_device_memory_tracker/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest
import numpy as np
import time

import tritonclient.http as tritonclient
from tritonclient.utils import InferenceServerException

import nvidia_smi


class MemoryUsageTest(unittest.TestCase):

def setUp(self):
nvidia_smi.nvmlInit()
self.gpu_handle_ = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
self.client_ = tritonclient.InferenceServerClient(url="localhost:8000")

def tearDown(self):
nvidia_smi.nvmlShutdown()

def report_used_gpu_memory(self):
info = nvidia_smi.nvmlDeviceGetMemoryInfo(self.gpu_handle_)
return info.used

def is_testing_backend(self, model_name, backend_name):
return self.client_.get_model_config(
model_name)["backend"] == backend_name

def verify_recorded_usage(self, model_stat):
recorded_gpu_usage = 0
for usage in model_stat["memory_usage"]:
if usage["type"] == "GPU":
recorded_gpu_usage += usage["byte_size"]
# unload and verify recorded usage
before_total_usage = self.report_used_gpu_memory()
self.client_.unload_model(model_stat["name"])
# unload return after puttting model to unload process,
# wait to be finished
time.sleep(2)
usage_delta = before_total_usage - self.report_used_gpu_memory()
# check with tolerance as gpu usage obtained is overall usage
self.assertTrue(
usage_delta * 0.9 <= recorded_gpu_usage <= usage_delta * 1.1,
msg=
"For model {}, expect recorded usage to be in range [{}, {}], got {}"
.format(model_stat["name"], usage_delta * 0.9, usage_delta * 1.1,
recorded_gpu_usage))

def test_onnx(self):
model_stats = self.client_.get_inference_statistics()["model_stats"]
for model_stat in model_stats:
if self.is_testing_backend(model_stat["name"], "onnxruntime"):
self.verify_recorded_usage(model_stat)

def test_plan(self):
model_stats = self.client_.get_inference_statistics()["model_stats"]
for model_stat in model_stats:
if self.is_testing_backend(model_stat["name"], "tensorrt"):
self.verify_recorded_usage(model_stat)


if __name__ == "__main__":
unittest.main()
122 changes: 122 additions & 0 deletions qa/L0_device_memory_tracker/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_LOG="./test.log"
TEST_PY=test.py

DATADIR=/data/inferenceserver/${REPO_VERSION}
rm -f *.log

TEST_RESULT_FILE='test_results.txt'
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./server.log"

source ../common/util.sh

RET=0

# prepare model repository, only contains ONNX and TRT models as the
# corresponding backend are known to be memory.
rm -rf models && mkdir models
# ONNX
cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.

# Convert to get TRT models against the system
CAFFE2PLAN=../common/caffe2plan
set +e
mkdir -p models/vgg19_plan/1 && rm -f models/vgg19_plan/1/model.plan && \
$CAFFE2PLAN -b32 -n prob -o models/vgg19_plan/1/model.plan \
$DATADIR/caffe_models/vgg19.prototxt $DATADIR/caffe_models/vgg19.caffemodel
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Failed to generate vgg19 PLAN\n***"
exit 1
fi

mkdir -p models/resnet50_plan/1 && rm -f models/resnet50_plan/1/model.plan && \
$CAFFE2PLAN -b32 -n prob -o models/resnet50_plan/1/model.plan \
$DATADIR/caffe_models/resnet50.prototxt $DATADIR/caffe_models/resnet50.caffemodel
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
exit 1
fi

mkdir -p models/resnet152_plan/1 && rm -f models/resnet152_plan/1/model.plan && \
$CAFFE2PLAN -h -b32 -n prob -o models/resnet152_plan/1/model.plan \
$DATADIR/caffe_models/resnet152.prototxt $DATADIR/caffe_models/resnet152.caffemodel
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Failed to generate resnet152 PLAN\n***"
exit 1
fi
set -e

# testing use nvidia-smi for Python to validate the reported usage
pip install nvidia-ml-py3

# Start server to load all models (in parallel), then gradually unload
# the models and expect the memory usage changes matches what are reported
# in statistic.
SERVER_ARGS="--model-repository=models --model-control-mode=explicit --load-model=*"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $TEST_PY > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
cat $SERVER_LOG
cat $TEST_LOG
echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET

0 comments on commit f07b3f9

Please sign in to comment.