NVIDIA · chesterxgchen · Aug 27, 2024 · Aug 25, 2024 · Aug 25, 2024 · Aug 25, 2024
diff --git a/examples/hello-world/step-by-step/higgs/sklearn-kmeans/code/sklearn_kmeans_job.py b/examples/hello-world/step-by-step/higgs/sklearn-kmeans/code/sklearn_kmeans_job.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from kmeans_assembler import KMeansAssembler
+
+from nvflare import FedJob
+from nvflare.app_common.aggregators.collect_and_assemble_aggregator import CollectAndAssembleAggregator
+from nvflare.app_common.shareablegenerators import FullModelShareableGenerator
+from nvflare.app_common.workflows.scatter_and_gather import ScatterAndGather
+from nvflare.app_opt.sklearn.joblib_model_param_persistor import JoblibModelParamPersistor
+from nvflare.job_config.script_runner import FrameworkType, ScriptRunner
+
+if __name__ == "__main__":
+    n_clients = 3
+    num_rounds = 20
+    train_script = "code/kmeans_fl.py"
+    script_args = "--data_root_dir /tmp/nvflare/dataset/output"
+
+    aggregator_id = "aggregator"
+    persistor_id = "persistor"
+    shareable_generator_id = "shareable_generator"
+    assembler_id = "kmeans_assembler"
+
+    job = FedJob("sklearn_kmeans")
+
+    initial_params = dict(n_clusters=2)
+    job.to(JoblibModelParamPersistor(initial_params=initial_params), "server", id=persistor_id)
+    job.to(FullModelShareableGenerator(), "server", id=shareable_generator_id)
+    job.to(CollectAndAssembleAggregator(assembler_id=assembler_id), "server", id=aggregator_id)
+    job.to(KMeansAssembler(), "server", id=assembler_id)
+
+    ctrl = ScatterAndGather(
+        min_clients=n_clients,
+        num_rounds=num_rounds,
+        start_round=0,
+        wait_time_after_min_received=0,
+        aggregator_id=aggregator_id,
+        persistor_id=persistor_id,
+        shareable_generator_id=shareable_generator_id,
+        train_task_name="train",
+        train_timeout=0,
+        allow_empty_global_weights=True,
+    )
+
+    job.to(ctrl, "server")
+
+    # Add clients
+    for i in range(n_clients):
+        runner = ScriptRunner(script=train_script, script_args=script_args, framework=FrameworkType.RAW)
+        job.to(runner, f"site-{i + 1}")
+
+    job.export_job("/tmp/nvflare/jobs")
+    job.simulator_run("/tmp/nvflare/sklearn_kmeans", gpu="0")
diff --git a/examples/hello-world/step-by-step/higgs/sklearn-kmeans/sklearn_kmeans.ipynb b/examples/hello-world/step-by-step/higgs/sklearn-kmeans/sklearn_kmeans.ipynb
@@ -21,11 +21,8 @@
     "\n",
     "## Setup NVFLARE\n",
     "\n",
-    "Follow [Getting Started](https://nvflare.readthedocs.io/en/main/getting_started.html) to set up a virtual environment and install NVFLARE.\n",
-    "\n",
-    "You can also follow this [notebook](https://github.com/NVIDIA/NVFlare/blob/main/examples/nvflare_setup.ipynb) to get set up.\n",
-    "\n",
-    "> Make sure you have installed nvflare from **terminal** \n"
+    "Follow [Getting Started](../../../../getting_started/readme.ipynb) to set up a virtual environment and install NVFLARE.\n",
+    "\n"
    ]
   },
   {
@@ -314,77 +311,8 @@
     "\n",
     "## Prepare Job  \n",
     "\n",
-    "Now, we have the code, we need to prepare job folder with configurations to run in NVFLARE. To do this, we can leveage the job template for scikit learn. First look at the the available job templates"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "01fdd962-e63c-4b58-81e7-beeedd05509b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!nvflare config -jt ../../../../../job_templates/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a5ed150d-4692-49fe-87a6-1779ec64d9df",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!nvflare job list_templates"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d7602cec-5fb5-4b23-a82a-b7444f2af471",
-   "metadata": {},
-   "source": [
-    "the `sklearn_kmeans` is the one we need. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9899a684-444f-4c04-a388-4bdd9b7dc649",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!nvflare job create -j /tmp/nvflare/jobs/sklearn_kmeans -force -w sklearn_kmeans \\\n",
-    "-sd code \\\n",
-    "-f config_fed_client.conf app_script=\"kmeans_fl.py\" app_config=\"--data_root_dir /tmp/nvflare/dataset/output\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "17dc5dcc-573e-479e-965d-0cca8b58995a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!cat /tmp/nvflare/jobs/sklearn_kmeans/app/config/config_fed_client.conf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7b3086fd-ecf3-4584-a643-5706ae2069b5",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!tree /tmp/nvflare/jobs/sklearn_kmeans"
+    "Now, we have the code, we need to prepare job folder with configurations to run in NVFLARE. To do this, we are going to use Job API to construct a FedJob, which can be used to run simulation and export Job config. \n",
+    "\n"
    ]
   },
   {
@@ -410,13 +338,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "09800691-46e0-4a95-bea6-d2a4a425052b",
+   "id": "dcb89b46-aad1-433d-b2b1-6b46380cef7c",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "!nvflare simulator /tmp/nvflare/jobs/sklearn_kmeans -w /tmp/nvflare/sklearn_kmeans -n 3 -t 3"
+    "!python3 code/sklearn_kmeans_job.py"
    ]
   },
   {
@@ -428,7 +356,7 @@
    "source": [
     "Let's examine the results.\n",
     "\n",
-    "We can notice from the FL training log, at the last round of local training, site-1 reports `site-1: global model homogeneity_score: 0.0068`\n",
+    "We can notice from the FL training log, at the last round of local training, site-1 reports `site-1: global model homogeneity_score: 0.0040`\n",
     "Now let's run a local training to verify if this number makes sense."
    ]
   },
@@ -449,22 +377,14 @@
    "id": "ea7bbacc-b059-4f82-9785-2b22bf840ef9",
    "metadata": {},
    "source": [
-    "HIGGS dataset is challenging for unsupervised clustering, as we can observe from the result. As shown by the local training with same number of iterations, the score is `model homogeneity_score: 0.0049`. As compared with the FL score of `0.0068`, FL in this case still provides some benefit from the collaborative learning.\n",
+    "HIGGS dataset is challenging for unsupervised clustering, as we can observe from the result. As shown by the local training with same number of iterations, the score is `model homogeneity_score: 0.0037`. As compared with the FL score of `0.0040`, FL in this case still provides some benefit from the collaborative learning.\n",
     "\n",
     "## We are done !\n",
     "Congratulations! you have just completed the federated k-Means clustering for tabular data. \n",
     "\n",
     "Now we will move on from scikit-learn and take a look at how to use federated XGBoost.\n",
     "In the next example [xgboost](../xgboost/xgboost_horizontal.ipynb), we will show a federated horizontal xgboost learning with bagging collaboration."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b5329a14-1a81-488a-8edc-2b7c85cffd8c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -483,7 +403,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.8.19"
   }
  },
  "nbformat": 4,

diff --git a/examples/hello-world/step-by-step/higgs/sklearn-linear/sklearn_linear.ipynb b/examples/hello-world/step-by-step/higgs/sklearn-linear/sklearn_linear.ipynb
@@ -21,11 +21,7 @@
     "\n",
     "## Setup NVFLARE\n",
     "\n",
-    "Follow [Getting Started](https://nvflare.readthedocs.io/en/main/getting_started.html) to set up a virtual environment and install NVFLARE.\n",
-    "\n",
-    "You can also follow this [notebook](https://github.com/NVIDIA/NVFlare/blob/main/examples/nvflare_setup.ipynb) to get set up.\n",
-    "\n",
-    "> Make sure you have installed nvflare from **terminal** \n"
+    "Follow [Getting Started](../../../../getting_started/readme.ipynb) to set up a virtual environment and install NVFLARE.\n"
    ]
   },
   {
@@ -307,7 +303,7 @@
     "\n",
     "## Prepare Job  \n",
     "\n",
-    "Now, we have the code, we need to prepare job folder with configurations to run in NVFLARE. To do this, we can leveage the job template for scikit learn. First look at the the available job templates"
+    "Now, we have the code, we need to prepare job folder with configurations to run in NVFLARE. To do this, we are going to use Job API to construct a FedJob, which can be used to run simulation and export Job config. "
    ]
   },
   {
@@ -319,65 +315,7 @@
    },
    "outputs": [],
    "source": [
-    "!nvflare config -jt ../../../../../job_templates/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a5ed150d-4692-49fe-87a6-1779ec64d9df",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!nvflare job list_templates"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d7602cec-5fb5-4b23-a82a-b7444f2af471",
-   "metadata": {},
-   "source": [
-    "the `sklearn_linear` is the one we need. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9899a684-444f-4c04-a388-4bdd9b7dc649",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!nvflare job create -j /tmp/nvflare/jobs/sklearn_sgd -force -w sklearn_linear \\\n",
-    "-sd code \\\n",
-    "-f config_fed_client.conf app_script=\"sgd_fl.py\" app_config=\"--data_root_dir /tmp/nvflare/dataset/output\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "17dc5dcc-573e-479e-965d-0cca8b58995a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!cat /tmp/nvflare/jobs/sklearn_sgd/app/config/config_fed_client.conf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7b3086fd-ecf3-4584-a643-5706ae2069b5",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!tree /tmp/nvflare/jobs/sklearn_sgd"
+    "!cat sklearn_linear_job.py"
    ]
   },
   {
@@ -403,13 +341,28 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "09800691-46e0-4a95-bea6-d2a4a425052b",
+   "id": "6a068316-b40b-407a-b886-b5c1c3082801",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "!nvflare simulator /tmp/nvflare/jobs/sklearn_sgd -w /tmp/nvflare/sklearn_sgd -n 3 -t 3"
+    "!python3 sklearn_linear_job.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dad05c48-b60d-4bcd-a13f-6b52056a804f",
+   "metadata": {},
+   "source": [
+    "You also run from the exported job folder using **Simulator CLI**\n",
+    "\n",
+    "``` \n",
+    "\n",
+    "!nvflare simulator /tmp/nvflare/jobs/sklearn_sgd -w /tmp/nvflare/sklearn_sgd -n 3 -t 3\n",
+    "\n",
+    "\n",
+    "```"
    ]
   },
   {
@@ -489,7 +442,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.8.19"
   }
  },
  "nbformat": 4,