NVIDIA · YuanTingHsieh · Apr 16, 2024 · Apr 13, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/tests/integration_test/data/jobs/np_loop/app_client/config/config_fed_client.conf b/tests/integration_test/data/jobs/np_loop/app_client/config/config_fed_client.conf
@@ -0,0 +1,43 @@
+{
+  format_version = 2
+  app_script = "train_loop.py"
+  app_config = ""
+  executors = [
+    {
+      tasks = [
+        "train"
+      ]
+      executor {
+        path = "nvflare.app_common.executors.client_api_launcher_executor.ClientAPILauncherExecutor"
+        args {
+          launcher_id = "launcher"
+          pipe_id = "pipe"
+          heartbeat_timeout = 60
+          params_exchange_format = "numpy"
+          params_transfer_type = "FULL"
+          train_with_evaluation = true
+        }
+      }
+    }
+  ]
+  task_data_filters = []
+  task_result_filters = []
+  components = [
+    {
+      id = "launcher"
+      path = "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher"
+      args {
+        script = "python3 custom/{app_script}  {app_config} "
+        launch_once = true
+      }
+    }
+    {
+      id = "pipe"
+      path = "nvflare.fuel.utils.pipe.file_pipe.FilePipe"
+      args {
+        mode = "PASSIVE"
+        root_path = "{WORKSPACE}/{JOB_ID}/{SITE_NAME}"
+      }
+    }
+  ]
+}
diff --git a/tests/integration_test/data/jobs/np_loop/app_client/custom/train_diff.py b/tests/integration_test/data/jobs/np_loop/app_client/custom/train_diff.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import nvflare.client as flare
+
+
+def train(input_arr):
+    output_arr = copy.deepcopy(input_arr)
+    # mock training with plus 1
+    return output_arr + 1
+
+
+def evaluate(input_arr):
+    # mock evaluation metrics
+    return 100
+
+
+def main():
+    # initializes NVFlare interface
+    flare.init()
+
+    # get model from NVFlare
+    input_model = flare.receive()
+    print(f"received weights is: {input_model.params}")
+
+    # get system information
+    sys_info = flare.system_info()
+    print(f"system info is: {sys_info}")
+
+    input_numpy_array = input_model.params["numpy_key"]
+
+    # training
+    output_numpy_array = train(input_numpy_array)
+
+    # evaluation
+    metrics = evaluate(input_numpy_array)
+
+    # calculate difference here
+    diff = output_numpy_array - input_numpy_array
+
+    # send back the model difference
+    print(f"send back: {diff}")
+    flare.send(flare.FLModel(params={"numpy_key": diff}, params_type="DIFF", metrics={"accuracy": metrics}))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration_test/data/jobs/np_loop/app_client/custom/train_full.py b/tests/integration_test/data/jobs/np_loop/app_client/custom/train_full.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import nvflare.client as flare
+
+
+def train(input_arr):
+    output_arr = copy.deepcopy(input_arr)
+    # mock training with plus 1
+    return output_arr + 1
+
+
+def evaluate(input_arr):
+    # mock evaluation metrics
+    return 100
+
+
+def main():
+    # initializes NVFlare interface
+    flare.init()
+
+    # get model from NVFlare
+    input_model = flare.receive()
+    print(f"received weights is: {input_model.params}")
+
+    # get system information
+    sys_info = flare.system_info()
+    print(f"system info is: {sys_info}")
+
+    input_numpy_array = input_model.params["numpy_key"]
+
+    # training
+    output_numpy_array = train(input_numpy_array)
+
+    # evaluation
+    metrics = evaluate(input_numpy_array)
+
+    # send back the model
+    print(f"send back: {output_numpy_array}")
+    flare.send(
+        flare.FLModel(params={"numpy_key": output_numpy_array}, params_type="FULL", metrics={"accuracy": metrics})
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration_test/data/jobs/np_loop/app_client/custom/train_loop.py b/tests/integration_test/data/jobs/np_loop/app_client/custom/train_loop.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import nvflare.client as flare
+
+
+def train(input_arr):
+    output_arr = copy.deepcopy(input_arr)
+    # mock training with plus 1
+    return output_arr + 1
+
+
+def evaluate(input_arr):
+    # mock evaluation metrics
+    return 100
+
+
+def main():
+    # initializes NVFlare interface
+    flare.init()
+
+    # get system information
+    sys_info = flare.system_info()
+    print(f"system info is: {sys_info}", flush=True)
+
+    while flare.is_running():
+        input_model = flare.receive()
+        print(f"received weights is: {input_model.params}", flush=True)
+
+        input_numpy_array = input_model.params["numpy_key"]
+
+        # training
+        output_numpy_array = train(input_numpy_array)
+
+        # evaluation
+        metrics = evaluate(input_numpy_array)
+
+        sys_info = flare.system_info()
+        print(f"system info is: {sys_info}", flush=True)
+        print(f"finish round: {input_model.current_round}", flush=True)
+
+        # send back the model
+        print(f"send back: {output_numpy_array}", flush=True)
+        flare.send(
+            flare.FLModel(
+                params={"numpy_key": output_numpy_array},
+                params_type="FULL",
+                metrics={"accuracy": metrics},
+                current_round=input_model.current_round,
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration_test/data/jobs/np_loop/app_client/custom/train_metrics.py b/tests/integration_test/data/jobs/np_loop/app_client/custom/train_metrics.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import time
+
+import nvflare.client as flare
+from nvflare.client.tracking import MLflowWriter
+
+
+def train(input_arr, current_round, epochs=3):
+    writer = MLflowWriter()
+    output_arr = copy.deepcopy(input_arr)
+    num_of_data = 2000
+    batch_size = 16
+    num_of_batches = num_of_data // batch_size
+    for i in range(epochs):
+        for j in range(num_of_batches):
+            global_step = current_round * num_of_batches * epochs + i * num_of_batches + j
+            print(f"logging record: {global_step}")
+            writer.log_metric(
+                key="global_step",
+                value=global_step,
+                step=global_step,
+            )
+        # mock training with plus 1
+        output_arr += 1
+        # assume each epoch takes 1 seconds
+        time.sleep(1.0)
+    return output_arr
+
+
+def evaluate(input_arr):
+    # mock evaluation metrics
+    return 100
+
+
+def main():
+    # initializes NVFlare interface
+    flare.init()
+
+    # get system information
+    sys_info = flare.system_info()
+    print(f"system info is: {sys_info}")
+
+    while flare.is_running():
+        input_model = flare.receive()
+        print(f"received weights is: {input_model.params}")
+
+        input_numpy_array = input_model.params["numpy_key"]
+
+        # training
+        output_numpy_array = train(input_numpy_array, current_round=input_model.current_round, epochs=3)
+
+        # evaluation
+        metrics = evaluate(input_numpy_array)
+
+        sys_info = flare.system_info()
+        print(f"system info is: {sys_info}")
+        print(f"finish round: {input_model.current_round}")
+
+        # send back the model
+        print(f"send back: {output_numpy_array}")
+        flare.send(
+            flare.FLModel(
+                params={"numpy_key": output_numpy_array},
+                params_type="FULL",
+                metrics={"accuracy": metrics},
+                current_round=input_model.current_round,
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration_test/data/jobs/np_loop/app_server/config/config_fed_server.conf b/tests/integration_test/data/jobs/np_loop/app_server/config/config_fed_server.conf
@@ -0,0 +1,47 @@
+{
+  format_version = 2
+  task_data_filters = []
+  task_result_filters = []
+  workflows = [
+    {
+      id = "scatter_and_gather"
+      path = "nvflare.app_common.workflows.scatter_and_gather.ScatterAndGather"
+      args {
+        min_clients = 2
+        num_rounds = 5
+        start_round = 0
+        wait_time_after_min_received = 0
+        aggregator_id = "aggregator"
+        persistor_id = "persistor"
+        shareable_generator_id = "shareable_generator"
+        train_task_name = "train"
+        train_timeout = 0
+      }
+    }
+  ]
+  components = [
+    {
+      id = "persistor"
+      path = "nvflare.app_common.np.np_model_persistor.NPModelPersistor"
+    }
+    {
+      id = "shareable_generator"
+      path = "nvflare.app_common.shareablegenerators.full_model_shareable_generator.FullModelShareableGenerator"
+      args {}
+    }
+    {
+      id = "aggregator"
+      path = "nvflare.app_common.aggregators.intime_accumulate_model_aggregator.InTimeAccumulateWeightedAggregator"
+      args {
+        expected_data_kind = "WEIGHTS"
+      }
+    }
+    {
+      id = "model_selector"
+      path = "nvflare.app_common.widgets.intime_model_selector.IntimeModelSelector"
+      args {
+        key_metric = "accuracy"
+      }
+    }
+  ]
+}
diff --git a/tests/integration_test/data/jobs/np_loop/meta.conf b/tests/integration_test/data/jobs/np_loop/meta.conf
@@ -0,0 +1,15 @@
+{
+  name = "np_loop"
+  resource_spec {}
+  deploy_map {
+    app_server = [
+      "server"
+    ],
+    app_client = [
+      "site-1",
+      "site-2"
+    ]
+  }
+  min_clients = 2
+  mandatory_clients = []
+}