Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update mlflow and fix terraform issues #2865

Merged
merged 13 commits into from
Nov 23, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ BUG FIXES:
* Handle unsupported azure subscriptions in cost reporting ([#2823](https://github.com/microsoft/AzureTRE/pull/2823))
* Redact secrets in conditional or nested properties ([#2854](https://github.com/microsoft/AzureTRE/pull/2854))
* Fix missing ID parameter in Certs bundle ([#2841](https://github.com/microsoft/AzureTRE/pull/2841))
* Fix ML Flow deployment issues and update version ([#2865](https://github.com/microsoft/AzureTRE/pull/2865))
* Handle 429 TooManyRequests and 503 ServiceUnavailable which might return from Azure Cost Management in TRE Cost API ([#2835](https://github.com/microsoft/AzureTRE/issues/2835))

COMPONENTS:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM python:3.8-buster

# Install MLflow Python Packages
RUN pip install --no-cache-dir psycopg2==2.9.3 mlflow==1.24.0 azure-storage-blob==12.10.0
RUN pip install --no-cache-dir psycopg2==2.9.5 mlflow==2.0.1 azure-storage-blob==12.14.1

RUN apt-get update \
&& apt-get install openssh-server -y --no-install-recommends \
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.1"
__version__ = "0.4.0"
43 changes: 27 additions & 16 deletions templates/workspace_services/mlflow/porter.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
name: tre-service-mlflow
version: 0.4.0
version: 0.5.6
description: "An Azure TRE service for MLflow machine learning lifecycle"
dockerfile: Dockerfile.tmpl
registry: azuretre
Expand Down Expand Up @@ -55,6 +55,13 @@ parameters:
type: boolean
default: false

outputs:
- name: internal_connection_uri
type: string
applyTo:
- install
- upgrade

mixins:
- exec
- terraform:
Expand All @@ -66,39 +73,43 @@ install:
vars:
workspace_id: "{{ bundle.parameters.workspace_id }}"
tre_id: "{{ bundle.parameters.tre_id }}"
resource_id: "{{ bundle.parameters.id }}"
tre_resource_id: "{{ bundle.parameters.id }}"
mgmt_acr_name: "{{ bundle.parameters.mgmt_acr_name }}"
mgmt_resource_group_name: "{{ bundle.parameters.mgmt_resource_group_name }}"
arm_tenant_id: "{{ bundle.credentials.azure_tenant_id }}"
arm_client_id: "{{ bundle.credentials.azure_client_id }}"
arm_client_secret: "{{ bundle.credentials.azure_client_secret }}"
arm_use_msi: "{{ bundle.parameters.arm_use_msi }}"
backendConfig:
resource_group_name: "{{ bundle.parameters.tfstate_resource_group_name }}"
storage_account_name: "{{ bundle.parameters.tfstate_storage_account_name }}"
container_name: "{{ bundle.parameters.tfstate_container_name }}"
key: "tre-service-mlflow-{{ bundle.parameters.id }}"
outputs:
- name: internal_connection_uri

upgrade:
- exec:
description: "Upgrade workspace service"
command: echo
arguments:
- "This workspace service does not implement upgrade action"
- terraform:
description: "Deploy workspace service"
vars:
workspace_id: "{{ bundle.parameters.workspace_id }}"
tre_id: "{{ bundle.parameters.tre_id }}"
tre_resource_id: "{{ bundle.parameters.id }}"
mgmt_acr_name: "{{ bundle.parameters.mgmt_acr_name }}"
mgmt_resource_group_name: "{{ bundle.parameters.mgmt_resource_group_name }}"
backendConfig:
resource_group_name: "{{ bundle.parameters.tfstate_resource_group_name }}"
storage_account_name: "{{ bundle.parameters.tfstate_storage_account_name }}"
container_name: "{{ bundle.parameters.tfstate_container_name }}"
key: "tre-service-mlflow-{{ bundle.parameters.id }}"
outputs:
- name: internal_connection_uri

uninstall:
- terraform:
description: "Tear down workspace service"
vars:
workspace_id: "{{ bundle.parameters.workspace_id }}"
tre_id: "{{ bundle.parameters.tre_id }}"
resource_id: "{{ bundle.parameters.id }}"
tre_resource_id: "{{ bundle.parameters.id }}"
mgmt_acr_name: "{{ bundle.parameters.mgmt_acr_name }}"
mgmt_resource_group_name: "{{ bundle.parameters.mgmt_resource_group_name }}"
arm_tenant_id: "{{ bundle.credentials.azure_tenant_id }}"
arm_client_id: "{{ bundle.credentials.azure_client_id }}"
arm_client_secret: "{{ bundle.credentials.azure_client_secret }}"
arm_use_msi: "{{ bundle.parameters.arm_use_msi }}"
backendConfig:
resource_group_name: "{{ bundle.parameters.tfstate_resource_group_name }}"
storage_account_name: "{{ bundle.parameters.tfstate_storage_account_name }}"
Expand Down
7 changes: 5 additions & 2 deletions templates/workspace_services/mlflow/terraform/locals.tf
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
locals {
short_service_id = substr(var.resource_id, -4, -1)
short_service_id = substr(var.tre_resource_id, -4, -1)
short_workspace_id = substr(var.workspace_id, -4, -1)
core_resource_group_name = "rg-${var.tre_id}"
workspace_resource_name_suffix = "${var.tre_id}-ws-${local.short_workspace_id}"
service_resource_name_suffix = "${var.tre_id}-ws-${local.short_workspace_id}-svc-${local.short_service_id}"
core_vnet = "vnet-${var.tre_id}"
webapp_name = "mlflow-${local.service_resource_name_suffix}"
postgresql_server_name = "mlflow-${local.service_resource_name_suffix}"
keyvault_name = lower("kv-${substr(local.workspace_resource_name_suffix, -20, -1)}")
Expand All @@ -18,4 +17,8 @@ locals {
tre_workspace_id = var.workspace_id
tre_workspace_service_id = var.tre_resource_id
}
web_app_diagnostic_categories_enabled = [
"AppServiceHTTPLogs", "AppServiceConsoleLogs", "AppServiceAppLogs", "AppServiceFileAuditLogs",
"AppServiceAuditLogs", "AppServiceIPSecAuditLogs", "AppServicePlatformLogs", "AppServiceAntivirusScanAuditLogs"
]
}
8 changes: 8 additions & 0 deletions templates/workspace_services/mlflow/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ terraform {
source = "hashicorp/random"
version = "=3.4.2"
}
local = {
source = "hashicorp/local"
version = "=2.2.3"
}
template = {
source = "hashicorp/template"
version = ">= 2.2"
}
}

backend "azurerm" {
Expand Down
3 changes: 3 additions & 0 deletions templates/workspace_services/mlflow/terraform/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "internal_connection_uri" {
value = "https://${azurerm_app_service.mlflow.default_site_hostname}"
}
8 changes: 0 additions & 8 deletions templates/workspace_services/mlflow/terraform/variables.tf
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
variable "workspace_id" {}
variable "tre_id" {}
variable "tre_workspace_service_tags" {}
variable "tre_resource_id" {}

variable "resource_id" {}

variable "mgmt_acr_name" {}
variable "mgmt_resource_group_name" {}

variable "arm_use_msi" {}
variable "arm_tenant_id" {}
variable "arm_client_id" {}
variable "arm_client_secret" {}

variable "is_exposed_externally" {
type = bool
description = "Is the webapp available on the public internet"
Expand Down
118 changes: 25 additions & 93 deletions templates/workspace_services/mlflow/terraform/web_app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,37 @@ data "azurerm_storage_share" "shared_storage" {
storage_account_name = local.storage_name
}

data "template_file" "mlflow-windows-config" {
data "template_file" "mlflow_windows_config" {
template = file("${path.module}/../mlflow-vm-config/windows/template_config.ps1")
vars = {
MLFlow_Connection_String = data.azurerm_storage_account.mlflow.primary_connection_string
}
}

data "template_file" "mlflow-linux-config" {
data "template_file" "mlflow_linux_config" {
template = file("${path.module}/../mlflow-vm-config/linux/template_config.sh")
vars = {
MLFlow_Connection_String = data.azurerm_storage_account.mlflow.primary_connection_string
}
}

resource "local_file" "mlflow-windows-config" {
content = data.template_file.mlflow-windows-config.rendered
resource "local_file" "mlflow_windows_config" {
content = data.template_file.mlflow_windows_config.rendered
filename = "${path.module}/../mlflow-vm-config/windows/config.ps1"
}

resource "local_file" "mlflow-linux-config" {
content = data.template_file.mlflow-linux-config.rendered
resource "local_file" "mlflow_linux_config" {
content = data.template_file.mlflow_linux_config.rendered
filename = "${path.module}/../mlflow-vm-config/linux/config.sh"
}

resource "azurerm_storage_share_file" "mlflow-config-windows" {
resource "azurerm_storage_share_file" "mlflow_config_windows" {
name = "mlflow-windows-config-${local.webapp_name}.ps1"
storage_share_id = data.azurerm_storage_share.shared_storage.id
source = "${path.module}/../mlflow-vm-config/windows/config.ps1"
}

resource "azurerm_storage_share_file" "mlflow-config-linux" {
resource "azurerm_storage_share_file" "mlflow_config_linux" {
name = "mlflow-linux-config-${local.webapp_name}.sh"
storage_share_id = data.azurerm_storage_share.shared_storage.id
source = "${path.module}/../mlflow-vm-config/linux/config.sh"
Expand All @@ -55,6 +55,7 @@ resource "azurerm_app_service" "mlflow" {
resource_group_name = data.azurerm_resource_group.ws.name
app_service_plan_id = data.azurerm_app_service_plan.workspace.id
https_only = true
tags = local.tre_workspace_service_tags

site_config {
linux_fx_version = "DOCKER|${data.azurerm_container_registry.mgmt_acr.login_server}/microsoft/azuretre/${local.image_name}:${local.image_tag}"
Expand Down Expand Up @@ -94,97 +95,27 @@ resource "azurerm_app_service" "mlflow" {
}
}

data "azurerm_monitor_diagnostic_categories" "mlflow" {
resource_id = azurerm_app_service.mlflow.id
depends_on = [
azurerm_app_service.mlflow
]
}
resource "azurerm_monitor_diagnostic_setting" "mlflow" {
name = "diag-${var.tre_id}"
target_resource_id = azurerm_app_service.mlflow.id
log_analytics_workspace_id = data.azurerm_log_analytics_workspace.tre.id

log {
category = "AppServiceHTTPLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServiceConsoleLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServiceAppLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServiceFileAuditLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServiceAuditLogs"
enabled = true
dynamic "log" {
for_each = data.azurerm_monitor_diagnostic_categories.mlflow.logs
content {
category = log.value
enabled = contains(local.web_app_diagnostic_categories_enabled, log.value) ? true : false

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServiceIPSecAuditLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServicePlatformLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

log {
category = "AppServiceAntivirusScanAuditLogs"
enabled = true

retention_policy {
days = 1
enabled = false
}
}

metric {
category = "AllMetrics"
enabled = true

retention_policy {
enabled = false
retention_policy {
enabled = contains(local.web_app_diagnostic_categories_enabled, log.value) ? true : false
days = 365
}
}
}
}
Expand All @@ -208,6 +139,7 @@ resource "azurerm_private_endpoint" "mlflow" {
location = data.azurerm_resource_group.ws.location
resource_group_name = data.azurerm_resource_group.ws.name
subnet_id = data.azurerm_subnet.services.id
tags = local.tre_workspace_service_tags

private_service_connection {
private_connection_resource_id = azurerm_app_service.mlflow.id
Expand Down