Adding Graviton Regression test CI (#3273)

* testing on graviton * testing on graviton * testing on graviton * checking python * rmv python * changing back to python * testing cpu instead * adding torchtext * adding torchtext * testing torchtext * removing two tests * removing pytorch test * adding numpy upgrade * adding numpy upgrade * testing full ci * testing full ci * testing full ci * skipping grpc * addign graviton ci * addign graviton ci * adding ci cpu graviton * adding ci cpu graviton * adding env * skipping a test for now * fixing env variable * removing scripted 3&4 * small changes * fixing lint * fixing lint * fixing lint * removing torchtext --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-26-170.us-west-2.compute.internal> Co-authored-by: Ankith Gunapal <agunapal@ischool.Berkeley.edu>
pytorch · Sep 10, 2024 · 87c9823 · 87c9823
1 parent 640b406
commit 87c9823
Show file tree

Hide file tree

Showing 6 changed files with 144 additions and 9 deletions.
diff --git a/.github/workflows/ci_graviton_cpu.yml b/.github/workflows/ci_graviton_cpu.yml
@@ -0,0 +1,48 @@
+name: CI CPU Graviton
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+  merge_group:
+
+
+concurrency:
+  group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  ci-cpu:
+    runs-on: [self-hosted, graviton-test]
+    steps:
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          architecture: arm64
+      - name: Setup Java 17
+        uses: actions/setup-java@v3
+        with:
+          distribution: 'zulu'
+          java-version: '17'
+      - name: Checkout TorchServe
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - name: Install dependencies
+        run: |
+          python ts_scripts/install_dependencies.py --environment=dev
+      - name: Torchserve Sanity
+        uses: nick-fields/retry@v3
+        env:
+          TS_MAC_ARM64_CPU_ONLY: 'True'
+        with:
+          timeout_minutes: 60
+          max_attempts: 3
+          retry_on: error
+          command: |
+            python torchserve_sanity.py
diff --git a/.github/workflows/regression_tests_graviton_cpu.yml b/.github/workflows/regression_tests_graviton_cpu.yml
@@ -0,0 +1,41 @@
+name: Run Regression Tests on CPU for Graviton
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+  merge_group:
+
+concurrency:
+  group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  regression-cpu:
+    runs-on: [self-hosted, graviton-test]
+    steps:
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          architecture: arm64
+      - name: Setup Java 17
+        uses: actions/setup-java@v3
+        with:
+          distribution: 'zulu'
+          java-version: '17'
+      - name: Checkout TorchServe
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - name: Install dependencies
+        run: |
+          python ts_scripts/install_dependencies.py --environment=dev
+      - name: Torchserve Regression Tests
+        env:
+          TS_MAC_ARM64_CPU_ONLY: 'True'
+        run: |
+          python test/regression_tests.py
diff --git a/test/pytest/test_gRPC_inference_api.py b/test/pytest/test_gRPC_inference_api.py
@@ -1,10 +1,12 @@
 import json
 import os
+import platform
 import threading
 from ast import literal_eval
 
 import inference_pb2
 import management_pb2
+import pytest
 import test_gRPC_utils
 import test_utils
 
@@ -50,6 +52,9 @@ def __infer(stub, model_name, model_input):
     return prediction
 
 
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def test_inference_apis():
     with open(os.path.join(os.path.dirname(__file__), inference_data_json), "rb") as f:
         test_data = json.loads(f.read())

diff --git a/test/pytest/test_model_custom_dependencies.py b/test/pytest/test_model_custom_dependencies.py
@@ -1,7 +1,9 @@
 import os
 import pathlib
+import platform
 import subprocess
 
+import pytest
 import requests
 import test_utils
 from model_archiver import ModelArchiver, ModelArchiverConfig
@@ -140,6 +142,9 @@ def register_model_and_make_inference_request(expect_model_load_failure=False):
         resp.raise_for_status()
 
 
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def test_install_dependencies_to_target_directory_with_requirements():
     test_utils.torchserve_cleanup()
 

diff --git a/test/pytest/test_pytorch_profiler.py b/test/pytest/test_pytorch_profiler.py
@@ -6,21 +6,28 @@
 import json
 import os
 import pathlib
+import platform
 import shutil
 import subprocess
+from concurrent import futures
 
 import pytest
 import requests
-
 import test_utils
-from concurrent import futures
 
 REPO_ROOT = os.path.normpath(
     os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../")
 )
-data_file_mnist = os.path.join(REPO_ROOT, "examples", "image_classifier", "mnist", "test_data", "1.png")
+data_file_mnist = os.path.join(
+    REPO_ROOT, "examples", "image_classifier", "mnist", "test_data", "1.png"
+)
 data_file_resnet = os.path.join(
-    REPO_ROOT, "examples", "image_classifier", "resnet_152_batch", "images", "kitten.jpg"
+    REPO_ROOT,
+    "examples",
+    "image_classifier",
+    "resnet_152_batch",
+    "images",
+    "kitten.jpg",
 )
 data_file_resnet_dog = os.path.join(
     REPO_ROOT, "examples", "image_classifier", "resnet_152_batch", "images", "dog.jpg"
@@ -33,6 +40,9 @@
 
 
 @pytest.fixture
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def set_custom_handler(handler_name):
     """
     This method downloads resnet serialized file, creates mar file and sets up a custom handler
@@ -48,7 +58,8 @@ def set_custom_handler(handler_name):
     serialized_file = os.path.join(test_utils.MODEL_STORE, "resnet152-394f9c45.pth")
     if not os.path.exists(serialized_file):
         response = requests.get(
-            "https://download.pytorch.org/models/resnet152-394f9c45.pth", allow_redirects=True
+            "https://download.pytorch.org/models/resnet152-394f9c45.pth",
+            allow_redirects=True,
         )
         assert response.status_code == 200
         with open(serialized_file, "wb") as f:
@@ -58,10 +69,21 @@ def set_custom_handler(handler_name):
     cmd = test_utils.model_archiver_command_builder(
         model_name="resnet-152-batch",
         version="1.0",
-        model_file=os.path.join(test_utils.CODEBUILD_WD, "examples", "image_classifier", "resnet_152_batch", "model.py"),
+        model_file=os.path.join(
+            test_utils.CODEBUILD_WD,
+            "examples",
+            "image_classifier",
+            "resnet_152_batch",
+            "model.py",
+        ),
         serialized_file=serialized_file,
         handler=handler_name,
-        extra_files=os.path.join(test_utils.CODEBUILD_WD, "examples", "image_classifier", "index_to_name.json"),
+        extra_files=os.path.join(
+            test_utils.CODEBUILD_WD,
+            "examples",
+            "image_classifier",
+            "index_to_name.json",
+        ),
         force=True,
     )
     print(cmd)
@@ -94,6 +116,9 @@ def set_custom_handler(handler_name):
     "handler_name",
     [os.path.join(profiler_utils, "resnet_custom.py"), "image_classifier"],
 )
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def test_profiler_default_and_custom_handler(set_custom_handler, handler_name):
     """
     Tests pytorch profiler integration with default and custom handler
@@ -112,6 +137,9 @@ def test_profiler_default_and_custom_handler(set_custom_handler, handler_name):
     "handler_name",
     [os.path.join(profiler_utils, "resnet_profiler_override.py")],
 )
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def test_profiler_arguments_override(set_custom_handler, handler_name):
     """
     Tests pytorch profiler integration when user overrides the profiler arguments
@@ -133,6 +161,9 @@ def test_profiler_arguments_override(set_custom_handler, handler_name):
     "handler_name",
     [os.path.join(profiler_utils, "resnet_profiler_override.py")],
 )
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def test_batch_input(set_custom_handler, handler_name):
     """
     Tests pytorch profiler integration with batch inference
@@ -146,7 +177,9 @@ def test_batch_input(set_custom_handler, handler_name):
 
     def invoke_batch_input():
         data = open(data_file_resnet, "rb")
-        response = requests.post("{}/predictions/resnet152".format(TF_INFERENCE_API), data)
+        response = requests.post(
+            "{}/predictions/resnet152".format(TF_INFERENCE_API), data
+        )
         assert response.status_code == 200
         assert "tiger_cat" in json.loads(response.content)
 

diff --git a/ts/torch_handler/unit_tests/test_object_detector.py b/ts/torch_handler/unit_tests/test_object_detector.py
@@ -5,6 +5,7 @@
 Ensures it can load and execute an example model
 """
 
+import platform
 import sys
 from pathlib import Path
 
@@ -56,7 +57,6 @@ def model_dir(tmp_path_factory, model_name):
 
 @pytest.fixture(scope="module")
 def context(model_dir, model_name):
-
     context = MockContext(
         model_name="mnist",
         model_dir=model_dir.as_posix(),
@@ -73,6 +73,9 @@ def handler(context):
     return handler
 
 
+@pytest.mark.skipif(
+    platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
+)
 def test_handle(handler, context, image_bytes):
     test_data = [{"data": image_bytes}] * 2
     results = handler.handle(test_data, context)