diff --git a/public-site/docs/src/.vuepress/config.ts b/public-site/docs/src/.vuepress/config.ts index 79172a43..9e02d83e 100644 --- a/public-site/docs/src/.vuepress/config.ts +++ b/public-site/docs/src/.vuepress/config.ts @@ -71,7 +71,18 @@ export default defineUserConfig({ '/guides/enable-and-disable-components/', '/guides/external-alias/', '/guides/component-start-stop-restart/', - '/guides/jobs/', + { + link: '/guides/jobs/', + text: "Jobs", + collapsible: true, + children: [ + '/guides/jobs/configure-jobs', + '/guides/jobs/job-manager-and-job-api', + '/guides/jobs/environment-variables', + '/guides/jobs/jobs-in-web-console', + '/guides/jobs/openapi-swagger' + ] + }, '/guides/deploy-only/', '/guides/build-and-deploy/', '/guides/deployment-promotion/', diff --git a/public-site/docs/src/docs/topic-rollingupdate/index.md b/public-site/docs/src/docs/topic-rollingupdate/index.md index 7ad79cad..59fac75c 100644 --- a/public-site/docs/src/docs/topic-rollingupdate/index.md +++ b/public-site/docs/src/docs/topic-rollingupdate/index.md @@ -20,6 +20,4 @@ Rolling updates ensure that the application is always available at pod level. Ho Radix uses readiness probe to minimize this downtime as close to zero as possible, where TCP socket is utilized. Kubernetes will attempt to open a TCP socket to the application container on the port specified in `radixconfig.yaml` file according to the following two parameters. - Initial delay seconds where Kubernetes will wait before performing the first probe after the container has started (currently set by Radix to 5 seconds) -- Period seconds interval where Kubernetes will perform the probes after the initial probe (currently set by Radix to 10 seconds) - -HTTP probe to the application is planned to be implemented in the future to ensure absolute zero downtime. However, this will require Radix users to provide an endpoint in their applications where Kubernetes will perform the probe. +- Period seconds interval where Kubernetes will perform the probes after the initial probe (currently set by Radix to 10 seconds) \ No newline at end of file diff --git a/public-site/docs/src/docs/topic-uptime/index.md b/public-site/docs/src/docs/topic-uptime/index.md index 572cdb58..c240d37f 100644 --- a/public-site/docs/src/docs/topic-uptime/index.md +++ b/public-site/docs/src/docs/topic-uptime/index.md @@ -80,4 +80,10 @@ Use Playground for testing Radix, see if it’s a good fit for your projects, an - **Uptime:** "Best-effort", but no guarantee of uptime. Planned maintenance is announced as early as possible -**Please note:** applications hosted in the Playground cluster may need to be re-registered after maintenance, upgrades or migrations. All such required actions from your part will be communicated in the Radix slack channel. +### Automatic cleanup in Playground cluster + +A stricter lifecycle policy for using Radix Playground is in place, to make sure everybody share the resources and avoid unattended applications using unnecessary resources. + +Any application which has not been **deployed** or **restarted** in the last `7 days` will be stopped. + +After further `21 days` of inactivity, all stopped applications will be **deleted**. diff --git a/public-site/docs/src/guides/deploy-only/example-github-action-to-create-radix-deploy-pipeline-job.md b/public-site/docs/src/guides/deploy-only/example-github-action-to-create-radix-deploy-pipeline-job.md index d5dbf6a2..59ce64ce 100644 --- a/public-site/docs/src/guides/deploy-only/example-github-action-to-create-radix-deploy-pipeline-job.md +++ b/public-site/docs/src/guides/deploy-only/example-github-action-to-create-radix-deploy-pipeline-job.md @@ -78,14 +78,14 @@ jobs: git push origin HEAD:main - name: 'Get environment from branch' # for "deploy only" pipeline workflow id: getEnvironment - uses: equinor/radix-github-actions@main + uses: equinor/radix-github-actions@v1 with: args: > get config branch-environment --from-config -b ${GITHUB_REF##*/} - name: 'Deploy API on Radix' - uses: equinor/radix-github-actions@main + uses: equinor/radix-github-actions@v1 with: args: > create job @@ -99,7 +99,7 @@ jobs: Following are last steps for "Build and deploy" pipeline workflow (e.g. when some application components need to be built): ```yaml - name: 'Build and deploy API on Radix' - uses: equinor/radix-github-actions@main + uses: equinor/radix-github-actions@v1 with: args: > create job diff --git a/public-site/docs/src/guides/deploy-only/example-github-action-using-ad-service-principal-access-token.md b/public-site/docs/src/guides/deploy-only/example-github-action-using-ad-service-principal-access-token.md index 24672cf0..5cbdd74e 100644 --- a/public-site/docs/src/guides/deploy-only/example-github-action-using-ad-service-principal-access-token.md +++ b/public-site/docs/src/guides/deploy-only/example-github-action-using-ad-service-principal-access-token.md @@ -47,7 +47,7 @@ jobs: -d '{"secretValue":"new value"}' \ -H 'Authorization: Bearer ${{ env.APP_SERVICE_ACCOUNT_TOKEN }}' - name: Restart qa env - uses: equinor/radix-github-actions@master + uses: equinor/radix-github-actions@v1 with: args: > restart diff --git a/public-site/docs/src/guides/jobs/configure-jobs.md b/public-site/docs/src/guides/jobs/configure-jobs.md index 8b6144c5..21173c22 100644 --- a/public-site/docs/src/guides/jobs/configure-jobs.md +++ b/public-site/docs/src/guides/jobs/configure-jobs.md @@ -1,5 +1,7 @@ --- title: Configuring Jobs +prev: . +next: job-manager-and-job-api --- # Configuring Jobs @@ -100,370 +102,3 @@ The etl job in the example above has `timeLimitSeconds` configured in its [`radi The maximum number of restarts if the job fails can be sent in the request body to the job scheduler as a JSON document with an element named `backoffLimit`. The etl job in the example above has `backoffLimit` configured in its [`radixconfig.yaml`](../../references/reference-radix-config/#backofflimit). If a new job is sent to the job scheduler without an element `backoffLimit`, it will default to the value specified in radixconfig.yaml. - -## Job Manager - -The Job Manager, aka "job-scheduler", is a web API service, that you use to create, delete and monitor the state of jobs. -Radix creates one job-scheduler per job defined in [`radixconfig.yaml`](../../references/reference-radix-config/#jobs). A job-scheduler will listen to the port defined by `schedulerPort` and host name equal to the `name` of the job. The job-scheduler API can only be accessed by components running in the same environment, and it is not exposed to the Internet. No authentication is required. - -The job-scheduler exposes the following methods for managing jobs - -### Single job - -- `POST /api/v1/jobs` Create a new job using the Docker image that Radix built for the job. Job-specific arguments can be sent in the request body - -```json -{ - "payload": "Sk9CX1BBUkFNMTogeHl6Cg==", - "timeLimitSeconds": 120, - "backoffLimit": 10, - "resources": { - "limits": { - "memory": "32Mi", - "cpu": "300m" - }, - "requests": { - "memory": "16Mi", - "cpu": "150m" - } - }, - "imageTagName": "1.0.0", - "node": { - "gpu": "gpu1, gpu2, gpu3", - "gpuCount": "6" - } -} -``` - -::: tip - `payload`, `timeLimitSeconds`, `backoffLimit`, `resources`, `node` and `imageTagName` are all optional fields and any of them can be omitted in the request. -::: - -- `GET /api/v1/jobs` Get states (with names and statuses) for all jobs -- `GET /api/v1/jobs/{jobName}` Get state for a named job -- `DELETE /api/v1/jobs/{jobName}` Delete a named job -- `POST /api/v1/jobs/{jobName}/stop` Stop a named job - -::: tip -`imageTagName` field allows to alter specific job image tag. In order to use it, the `{imageTagName}` need to be set as described in the [`radixconfig.yaml`](../../references/reference-radix-config/#imagetagname-2) -::: - -### Batch of jobs - -- `POST /api/v1/batches` Create a new batch of single jobs, using the Docker image, that Radix built for the job component. Job-specific arguments can be sent in the request body, specified individually for each item in `JobScheduleDescriptions` - -```json -{ - "jobScheduleDescriptions": [ - { - "payload": "{'data':'value1'}" - }, - { - "payload": "{'data':'value2'}" - }, - { - "payload": "{'data':'value3'}" - } - ] -} -``` - -> `payload`, `timeLimitSeconds`, `resources` and `node` are all optional fields and any of them can be omitted in the request. - -- `GET /api/v1/batches` Get states (with names and statuses) for all batches -- `GET /api/v1/batches/{batchName}` Get state for a named batch and statuses of its jobs -- `DELETE /api/v1/batches/{batchName}` Delete a named batch -- `POST /api/v1/batches/{batchName}/stop` Stop a named batch -- `POST /api/v1/batches/{batchName}/jobs/{jobName}/stop` Stop a named job of a batch - -## Starting a new job - -The example configuration at the top has component named `backend` and two jobs, `compute` and `etl`. Radix creates two job-schedulers, one for each of the two jobs. The job-scheduler for `compute` listens to `http://compute:8000`, and job-scheduler for `etl` listens to `http://etl:9000`. - -To start a new single job, send a `POST` request to `http://compute:8000/api/v1/jobs` with request body set to - -```json -{ - "payload": "{\"x\": 10, \"y\": 20}" -} -``` - -The job-scheduler creates a new job and mounts the payload from the request body to a file named `payload` in the directory `/compute/args`. -Once the job has been created successfully, the `job-scheduler` responds to `backend` with a job state object - -```json -{ - "name": "batch-compute-20230220101417-idwsxncs-rkwaibwe", - "started": "", - "ended": "", - "status": "Running" -} -``` - -- `name` is the unique name for the job. This is the value to be used in the `GET /api/v1/jobs/{jobName}` and `DELETE /api/v1/jobs/{jobName}` methods. It is also the host name to connect to running job's container, with its exposed port, e.g. `http://batch-compute-20230220100755-xkoxce5g-mll3kxxh:3000` -- `started` is the date and time the job was started. It is represented in RFC3339 form and is in UTC. -- `ended` is the date and time the job successfully ended. Also represented in RFC3339 form and is in UTC. This value is only set for `Successful` jobs. -- `status` is the current status of the job container. Possible values are `Running`, `Successful` and `Failed`. Status is `Failed` if the container exits with a non-zero exit code, and `Successful` if the exit code is zero. - -## Getting the status of all existing jobs - -Get a list of all single jobs with their states by sending a `GET` request to `http://compute:8000/api/v1/jobs`. The response is an array of job state objects, similar to the response received when creating a new job. Jobs that have been started within a batch are not included in this list - -```json -[ - { - "name": "batch-compute-20230220100755-xkoxce5g-mll3kxxh", - "started": "2021-04-07T09:08:37Z", - "ended": "2021-04-07T09:08:45Z", - "status": "Succeeded" - }, - { - "name": "batch-compute-20230220101417-idwsxncs-rkwaibwe", - "started": "2021-04-07T10:55:56Z", - "ended": "", - "status": "Failed" - } -] -``` - -To get state for a specific job (single or one within a batch), e.g. `batch-compute-20230220100755-xkoxce5g-mll3kxxh`, send a `GET` request to `http://compute:8000/api/v1/jobs/batch-compute-20230220100755-xkoxce5g-mll3kxxh`. The response is a single job state object - -```json -{ - "name": "batch-compute-20230220100755-xkoxce5g-mll3kxxh", - "started": "2021-04-07T09:08:37Z", - "ended": "2021-04-07T09:08:45Z", - "status": "Succeeded" -} -``` - -## Deleting an existing job - -The job list in the example above has a job named `batch-compute-20230220101417-idwsxncs-rkwaibwe`. To delete it, send a `DELETE` request to `http://compute:8000/api/v1/jobs/batch-compute-20230220101417-idwsxncs-rkwaibwe`. A successful deletion will respond with result object. Only single job can be deleted with this method - -```json -{ - "status": "Success", - "message": "job batch-compute-20230220101417-idwsxncs-rkwaibwe successfully deleted", - "code": 200 -} -``` - -## Stop a job - -The job list in the example above has a job named `batch-compute-20230220100755-xkoxce5g-mll3kxxh`. To stop it, send a `POST` request to `http://compute:8000/api/v1/jobs/batch-compute-20230220100755-xkoxce5g-mll3kxxh/stop`. A successful stop will respond with result object. Only single job can be stopped with this method. Stop of a job automatically deletes corresponding Kubernetes job and its replica, as well as its log. The job will get the status "Stopped". - -```json -{ - "status": "Success", - "message": "job batch-compute-20230220100755-xkoxce5g-mll3kxxh successfully stopped", - "code": 200 -} -``` - -```json -{ - "status": "Success", - "message": "job batch-compute-20230220101417-idwsxncs-rkwaibwe successfully stopped", - "code": 200 -} -``` - -## Starting a new batch of jobs - -To start a new batch of jobs, send a `POST` request to `http://compute:8000/api/v1/batches` with request body set to - -```json -{ - "jobScheduleDescriptions": [ - { - "payload": "{\"x\": 10, \"y\": 20}" - }, - { - "payload": "{\"x\": 20, \"y\": 30}" - } - ] -} -``` - -Jobs can have `jobId` - -```json -{ - "jobScheduleDescriptions": [ - { - "jobId": "job-1", - "payload": "{\"x\": 10, \"y\": 20}" - }, - { - "jobId": "job-2", - "payload": "{\"x\": 20, \"y\": 30}" - } - ] -} -``` - -Default parameters for jobs can be defined within `DefaultRadixJobComponentConfig`. These parameters can be overridden for each job individually in `JobScheduleDescriptions` - -```json -{ - "defaultRadixJobComponentConfig": { - "timeLimitSeconds": 200, - "backoffLimit": 5, - "resources": { - "limits": { - "memory": "200Mi", - "cpu": "200m" - }, - "requests": { - "memory": "100Mi", - "cpu": "100m" - } - }, - "imageTagName": "1.0.0" - }, - "jobScheduleDescriptions": [ - { - "payload": "{'data':'value1'}", - "timeLimitSeconds": 120, - "backoffLimit": 2, - "resources": { - "limits": { - "memory": "32Mi", - "cpu": "300m" - }, - "requests": { - "memory": "16Mi", - "cpu": "150m" - } - }, - "node": { - "gpu": "gpu1, gpu2, gpu3", - "gpuCount": "6" - } - }, - { - "payload": "{'data':'value2'}", - "imageTagName": "2.0.0" - }, - { - "payload": "{'data':'value3'}", - "timeLimitSeconds": 300, - "backoffLimit": 10, - "node": { - "gpu": "gpu3", - "gpuCount": "1" - } - } - ] -} -``` - -The job-scheduler creates a new batch, which will create single jobs for each item in the `JobScheduleDescriptions`. -Once the batch has been created, the `job-scheduler` responds to `backend` with a batch state object - -```json -{ - "batchName": "batch-compute-20220302170647-6ytkltvk", - "name": "batch-compute-20220302170647-6ytkltvk-tlugvgs", - "created": "2022-03-02T17:06:47+01:00", - "status": "Running" -} -``` - -- `batchName` is the unique name for the batch. This is the value to be used in the `GET /api/v1/batches/{batchName}` and `DELETE /api/v1/batches/{batchName}` methods. -- `started` is the date and time the batch was started. The value is represented in RFC3339 form and is in UTC. -- `ended` is the date and time the batch successfully ended (empty when not completed). The value is represented in RFC3339 form and is in UTC. This value is only set for `Successful` batches. Batch is ended when all batched jobs are completed or failed. -- `status` is the current status of the batch. Possible values are `Running`, `Successful` and `Failed`. Status is `Failed` if the batch fails for any reason. - -## Get a list of all batches - -Get a list of all batches with their states by sending a `GET` request to `http://compute:8000/api/v1/batches`. The response is an array of batch state objects, similar to the response received when creating a new batch - -```json -[ - { - "name": "batch-compute-20220302155333-hrwl53mw", - "created": "2022-03-02T15:53:33+01:00", - "started": "2022-03-02T15:53:33+01:00", - "ended": "2022-03-02T15:54:00+01:00", - "status": "Succeeded" - }, - { - "name": "batch-compute-20220302170647-6ytkltvk", - "created": "2022-03-02T17:06:47+01:00", - "started": "2022-03-02T17:06:47+01:00", - "status": "Running" - } -] -``` - -## Get a state of a batch - -To get state for a specific batch, e.g. `batch-compute-20220302155333-hrwl53mw`, send a `GET` request to `http://compute:8000/api/v1/batches/batch-compute-20220302155333-hrwl53mw`. The response is a batch state object, with states of its jobs - -```json -{ - "name": "batch-compute-20220302155333-hrwl53mw", - "created": "2022-03-02T15:53:33+01:00", - "started": "2022-03-02T15:53:33+01:00", - "ended": "2022-03-02T15:54:00+01:00", - "status": "Succeeded", - "jobStatuses": [ - { - "jobId": "job1", - "batchName": "batch-compute-20220302155333-hrwl53mw", - "name": "batch-compute-20220302155333-hrwl53mw-fjhcqwj7", - "created": "2022-03-02T15:53:36+01:00", - "started": "2022-03-02T15:53:36+01:00", - "ended": "2022-03-02T15:53:56+01:00", - "status": "Succeeded" - }, - { - "jobId": "job2", - "batchName": "batch-compute-20220302155333-hrwl53mw", - "name": "batch-compute-20220302155333-hrwl53mw-qjzykhrd", - "created": "2022-03-02T15:53:39+01:00", - "started": "2022-03-02T15:53:39+01:00", - "ended": "2022-03-02T15:53:56+01:00", - "status": "Succeeded" - } - ] -} -``` - -## Delete a batch - -The batch list in the example above has a batch named `batch-compute-20220302155333-hrwl53mw`. To delete it, send a `DELETE` request to `http://compute:8000/api/v1/batches/batch-compute-20220302155333-hrwl53mw`. A successful deletion will respond with result object. Deleting of a batch job automatically deletes all jobs, belonging to this batch job. - -```json -{ - "status": "Success", - "message": "batch batch-compute-20220302155333-hrwl53mw successfully deleted", - "code": 200 -} -``` - -## Stop an existing batch - -The batch list in the example above has a batch named `batch-compute-20220302155333-hrwl53mw`. To stop it, send a `POST` request to `http://compute:8000/api/v1/batches/batch-compute-20220302155333-hrwl53mw/stop`. A successful stop will respond with result object. Stop of a batch automatically deletes all batch Kubernetes jobs and their replicas, belonging to this batch job, as well as their logs. All not completed jobs will get the status "Stopped". - -```json -{ - "status": "Success", - "message": "batch batch-compute-20220302155333-hrwl53mw successfully stopped", - "code": 200 -} -``` - -## Stop a jobs in a batch - -The batch list in the example above has a batch named `batch-compute-20220302155333-hrwl53mw` and jobs, one of whicvh has name `batch-compute-20220302155333-hrwl53mw-fjhcqwj7`. To stop this job, send a `POST` request to `http://compute:8000/api/v1/batches/batch-compute-20220302155333-hrwl53mw/jobs/batch-compute-20220302155333-hrwl53mw-fjhcqwj7/stop`. A successful stop will respond with result object. Stop of a batch job automatically deletes corresponding Kubernetes job and its replica, as well as its log. The job will get the status "Stopped". - -```json -{ - "status": "Success", - "message": "job batch-compute-20220302155333-hrwl53mw-fjhcqwj7 in the batch batch-compute-20220302155333-hrwl53mw successfully stopped", - "code": 200 -} -``` \ No newline at end of file diff --git a/public-site/docs/src/guides/jobs/environment-variables.md b/public-site/docs/src/guides/jobs/environment-variables.md index 563ac697..8afe0599 100644 --- a/public-site/docs/src/guides/jobs/environment-variables.md +++ b/public-site/docs/src/guides/jobs/environment-variables.md @@ -1,5 +1,7 @@ --- title: Environment variables +prev: job-manager-and-job-api +next: jobs-in-web-console --- # Environment variables in jobs diff --git a/public-site/docs/src/guides/jobs/index.md b/public-site/docs/src/guides/jobs/index.md index 70e6af50..fb70f185 100644 --- a/public-site/docs/src/guides/jobs/index.md +++ b/public-site/docs/src/guides/jobs/index.md @@ -1,5 +1,6 @@ --- title: Jobs +next: 'configure-jobs' --- # Jobs diff --git a/public-site/docs/src/guides/jobs/job-manager-and-job-api.md b/public-site/docs/src/guides/jobs/job-manager-and-job-api.md index 3ed9088d..30b2d020 100644 --- a/public-site/docs/src/guides/jobs/job-manager-and-job-api.md +++ b/public-site/docs/src/guides/jobs/job-manager-and-job-api.md @@ -1,5 +1,7 @@ --- title: Job Manager and job API +prev: configure-jobs +next: environment-variables --- ## Job Manager and job API @@ -7,7 +9,19 @@ title: Job Manager and job API The Job Manager, aka "job-scheduler", is a web API service, that you use to create, delete and monitor the state of jobs. Radix creates one job-scheduler per job defined in [`radixconfig.yaml`](../../references/reference-radix-config/#jobs). A job-scheduler will listen to the port defined by `schedulerPort` and host name equal to the `name` of the job. The job-scheduler API can only be accessed by components running in the same environment, and it is not exposed to the Internet. No authentication is required. -The job-scheduler exposes the following methods for managing jobs +The Job Manager exposes the following methods for managing jobs: +- `GET /api/v1/jobs` Get states (with names and statuses) for all jobs +- `GET /api/v1/jobs/{jobName}` Get state for a named job +- `DELETE /api/v1/jobs/{jobName}` Delete a named job +- `POST /api/v1/jobs/{jobName}/stop` Stop a named job + + +... and the following methods for managing batches: +- `GET /api/v1/batches` Get states (with names and statuses) for all batches +- `GET /api/v1/batches/{batchName}` Get state for a named batch and statuses of its jobs +- `DELETE /api/v1/batches/{batchName}` Delete a named batch +- `POST /api/v1/batches/{batchName}/stop` Stop a named batch +- `POST /api/v1/batches/{batchName}/jobs/{jobName}/stop` Stop a named job of a batch ### Create a single job @@ -16,6 +30,7 @@ The job-scheduler exposes the following methods for managing jobs ```json { "payload": "Sk9CX1BBUkFNMTogeHl6Cg==", + "imageTagName": "1.0.0", "timeLimitSeconds": 120, "backoffLimit": 10, "resources": { @@ -35,41 +50,68 @@ The job-scheduler exposes the following methods for managing jobs } ``` -> `payload`, `timeLimitSeconds`, `backoffLimit`, `resources` and `node` are all optional fields and any of them can be omitted in the request. + `payload`, `imageTagName`, `timeLimitSeconds`, `backoffLimit`, `resources` and `node` are all optional fields and any of them can be omitted in the request. -- `GET /api/v1/jobs` Get states (with names and statuses) for all jobs -- `GET /api/v1/jobs/{jobName}` Get state for a named job -- `DELETE /api/v1/jobs/{jobName}` Delete a named job -- `POST /api/v1/jobs/{jobName}/stop` Stop a named job +`imageTagName` field allows to alter specific job image tag. In order to use it, the `{imageTagName}` need to be set as described in the [`radixconfig.yaml`](../../references/reference-radix-config/#imagetagname-2) ### Create a batch of jobs -- `POST /api/v1/batches` Create a new batch of single jobs, using the Docker image, that Radix built for the job component. Job-specific arguments can be sent in the request body, specified individually for each item in `JobScheduleDescriptions` +- `POST /api/v1/batches` Create a new batch of single jobs, using the Docker image, that Radix built for the job component. Job-specific arguments can be sent in the request body, specified individually for each item in `jobScheduleDescriptions` with default values defined in `defaultRadixJobComponentConfig`. ```json { + "defaultRadixJobComponentConfig": { + "imageTagName": "1.0.0", + "timeLimitSeconds": 200, + "backoffLimit": 5, + "resources": { + "limits": { + "memory": "200Mi", + "cpu": "200m" + }, + "requests": { + "memory": "100Mi", + "cpu": "100m" + }, + "node": { + "gpu": "gpu1", + "gpuCount": "2" + } + } + }, "jobScheduleDescriptions": [ { - "payload": "{'data':'value1'}" + "payload": "{'data':'value1'}", + "imageTagName": "1.0.0", + "timeLimitSeconds": 120, + "backoffLimit": 10, + "resources": { + "limits": { + "memory": "32Mi", + "cpu": "300m" + }, + "requests": { + "memory": "16Mi", + "cpu": "150m" + } + }, + "node": { + "gpu": "gpu1, gpu2, gpu3", + "gpuCount": "6" + } }, { - "payload": "{'data':'value2'}" + "payload": "{'data':'value2'}", + ... }, { - "payload": "{'data':'value3'}" + "payload": "{'data':'value3'}", + ... } ] } ``` -> `payload`, `timeLimitSeconds`, `resources` and `node` are all optional fields and any of them can be omitted in the request. - -- `GET /api/v1/batches` Get states (with names and statuses) for all batches -- `GET /api/v1/batches/{batchName}` Get state for a named batch and statuses of its jobs -- `DELETE /api/v1/batches/{batchName}` Delete a named batch -- `POST /api/v1/batches/{batchName}/stop` Stop a named batch -- `POST /api/v1/batches/{batchName}/jobs/{jobName}/stop` Stop a named job of a batch - ### Starting a new job The example configuration at the top has component named `backend` and two jobs, `compute` and `etl`. Radix creates two job-schedulers, one for each of the two jobs. The job-scheduler for `compute` listens to `http://compute:8000`, and job-scheduler for `etl` listens to `http://etl:9000`. @@ -202,6 +244,7 @@ Default parameters for jobs can be defined within `DefaultRadixJobComponentConfi ```json { "defaultRadixJobComponentConfig": { + "imageTagName": "1.0.0", "timeLimitSeconds": 200, "backoffLimit": 5, "resources": { @@ -236,7 +279,8 @@ Default parameters for jobs can be defined within `DefaultRadixJobComponentConfi } }, { - "payload": "{'data':'value2'}" + "payload": "{'data':'value2'}", + "imageTagName": "2.0.0" }, { "payload": "{'data':'value3'}", diff --git a/public-site/docs/src/guides/jobs/jobs-in-web-console.md b/public-site/docs/src/guides/jobs/jobs-in-web-console.md index 70279407..d51a7559 100644 --- a/public-site/docs/src/guides/jobs/jobs-in-web-console.md +++ b/public-site/docs/src/guides/jobs/jobs-in-web-console.md @@ -1,5 +1,7 @@ --- title: Managing Jobs in Web Console +prev: environment-variables +next: openapi-swagger --- ## Managing Jobs in Web Console diff --git a/public-site/docs/src/guides/jobs/openapi-swagger.md b/public-site/docs/src/guides/jobs/openapi-swagger.md index 34cacc73..f75af226 100644 --- a/public-site/docs/src/guides/jobs/openapi-swagger.md +++ b/public-site/docs/src/guides/jobs/openapi-swagger.md @@ -1,5 +1,6 @@ --- title: OpenAPI/Swagger spec +prev: jobs-in-web-console --- ## OpenAPI/Swagger spec diff --git a/public-site/docs/src/guides/volume-mounts/index.md b/public-site/docs/src/guides/volume-mounts/index.md index 638e7d26..b79dddb9 100644 --- a/public-site/docs/src/guides/volume-mounts/index.md +++ b/public-site/docs/src/guides/volume-mounts/index.md @@ -34,7 +34,6 @@ environmentConfig: - name: storage path: /app/image-storage blobfuse2: - protocol: fuse2 container: blobfusevolumetestdata ``` @@ -96,13 +95,11 @@ To add multiple volumes - name: storage1 path: /app/image-storage blobfuse2: - protocol: fuse2 container: blobfusevolumetestdata uid: 1000 - name: storage3 path: /app/image-storage3 blobfuse2: - protocol: fuse2 container: blobfusevolumetestdata3 uid: 1000 ``` diff --git a/public-site/docs/src/other/release/index.md b/public-site/docs/src/other/release/index.md index 4e51713c..26e056cd 100644 --- a/public-site/docs/src/other/release/index.md +++ b/public-site/docs/src/other/release/index.md @@ -13,6 +13,23 @@ A `Radix application reader` role has been added to Radix. This role is a read-o Readers have the privilege to view logs associated with their replicas and jobs. The role is an Azure AD group that can be assigned under `Access control` in the Configuration page of the application. +### 2023-08-01 - Restart batches and *jobs with recent active deployment* + +In addition to restart job with original deployment, Radix now allows to restart scheduled single jobs, entire batch or individual jobs within the batch with latest active deployment (if it is different than for the restarting job). [Read more](../guides/jobs/jobs-in-web-console.md) +Scheduled jobs now can be run with `imageTagName` , specified in [radixconfig](../../references/reference-radix-config/index.md) and altered in [JobDescription](../guides/jobs/configure-jobs.md#single-job) +```` +{ + "payload": "abc", + "imageTagName": "1.0.0" +} +```` + +### 2023-07-13 - Restart scheduled batches and jobs + +Radix now allows to restart scheduled single jobs, entire batch or individual jobs within the batch - completed, failed, stopped or running. +Technically it deletes the corresponding Kubernetes job and starts new instead, with the same deployment, job-description and payload. +Use-case - restart jobs, failed due to temporary issues, lack of memory, unavailable external data or api. + ### 2023-07-05 - Change in Azure Blob volume-mounts option If your Radix application uses [Azure Blob volume mount](../../guides/volume-mounts/), [radixconfig.yaml](../../references/reference-radix-config/index.md) it is recommended to replace its configuration with BlobFuse2: @@ -115,7 +132,7 @@ The Radix Github Action is used by many Radixians to execute rx CLI commands in ```yaml - name: list-apps - uses: equinor/radix-github-actions@master + uses: equinor/radix-github-actions@v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} args: > @@ -131,7 +148,7 @@ The Radix Github Action is used by many Radixians to execute rx CLI commands in ### 2023-01-31 Radix Playground lifecycle policy -We will now implement a stricter lifecycle policy for using Radix Playground. +We will now implement a stricter [lifecycle policy](../../docs/topic-uptime/#Automatic-cleanup-in-Playground-cluster) for using Radix Playground. Any application which has not been a - deployed - or b - restarted diff --git a/public-site/docs/src/references/reference-radix-config/index.md b/public-site/docs/src/references/reference-radix-config/index.md index 88798a85..c9fa3642 100644 --- a/public-site/docs/src/references/reference-radix-config/index.md +++ b/public-site/docs/src/references/reference-radix-config/index.md @@ -447,7 +447,6 @@ spec: - name: volume-name path: /path/in/container/to/mount/to blobfuse2: - protocol: fuse2 container: container-name uid: 1000 ``` @@ -461,7 +460,7 @@ The `volumeMounts` field configures volume mounts within the running component. - `blobfuse2` - mount a container from blob in [Azure storage account](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-overview). Uses [CSI Azure blob storage driver](https://github.com/kubernetes-sigs/blob-csi-driver). Replaces types `blob` and `azure-blob` for obsolete drivers. _Options for `blobfuse2`_ - - `protocol` - a protocol, supported by the BlobFuse2. Currently, supports `fuse2`. + - `protocol` - (optional) a protocol, supported by the BlobFuse2. Currently, supports `fuse2` (default) and `nfs`. - `container` - name of the blob container. - `uid` and/or `gid` - User ID and/or group ID (numbers) of a [mounted volume owner](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.21/#podsecuritycontext-v1-core). It is a User ID and Group ID of a user in the running container within component replicas. Usually a user, which is a member of one or multiple [groups](https://en.wikipedia.org/wiki/Group_identifier), is specified in the `Dockerfile` for the component with command `USER`. Read [more details](https://www.radix.equinor.com/docs/topic-docker/#running-as-non-root) about specifying user within `Dockerfile`. It is recommended to use because Blobfuse driver do [not honor fsGroup securityContext settings](https://github.com/kubernetes-sigs/blob-csi-driver/blob/master/docs/driver-parameters.md). - `useAdls` - (optional) enables blobfuse to access Azure DataLake storage account. When set to false, blobfuse will access Azure Block Blob storage account, hierarchical file system is not supported. Default `false`. This must be set `true` when [HNS enabled account](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-namespace) is mounted. @@ -920,7 +919,6 @@ spec: - name: volume-name path: /path/in/container/to/mount/to blobfuse2: - protocol: fuse2 container: container-name uid: 1000 ``` @@ -1177,7 +1175,6 @@ spec: - name: volume-name path: /path/in/container/to/mount/to blobfuse2: - protocol: fuse2 container: container-name uid: 1000 secretRefs: