Skip to content

Commit

Permalink
Switch to Open Telemetry for logging (#3762)
Browse files Browse the repository at this point in the history
  • Loading branch information
marrobi committed Dec 7, 2023
1 parent ea75911 commit c382f3d
Show file tree
Hide file tree
Showing 62 changed files with 785 additions and 889 deletions.
11 changes: 8 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@
"console": "integratedTerminal",
"preLaunchTask": "Copy_env_file_for_api_debug",
"cwd": "${workspaceFolder}/api_app",
"envFile": "${workspaceFolder}/api_app/.env"
"envFile": "${workspaceFolder}/api_app/.env",
"env": {
"OTEL_RESOURCE_ATTRIBUTES": "service.name=api,service.instance.id=local_debug,service.version=dev"
}
},
{
"name": "E2E Extended",
Expand Down Expand Up @@ -190,8 +193,10 @@
"cwd": "${workspaceFolder}/resource_processor",
"envFile": "${workspaceFolder}/core/private.env",
"env": {
"PYTHONPATH": "."
}
"PYTHONPATH": ".",
"OTEL_RESOURCE_ATTRIBUTES": "service.name=resource_processor,service.instance.id=local_debug,service.version=dev"
},
"justMyCode": false
},
{
"name": "Debug Python file",
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
FEATURES:

ENHANCEMENTS:
* Switch from OpenCensus to OpenTelemetry for logging ([#3762](https://github.com/microsoft/AzureTRE/pull/3762))

BUG FIXES:

Expand Down
3 changes: 2 additions & 1 deletion api_app/.env.sample
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# API configuration
# -----------------
# When debug is set to True, debugging information for unhandled exceptions is shown in the swagger UI and logging is more verbose
# DEBUG=True
# LOGGING_LEVEL can be set to DEBUG, INFO, WARNING, ERROR or CRITICAL
LOGGING_LEVEL="INFO"

# OAUTH information - client ids etc. for the AAD Apps
# ----------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion api_app/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.16.9"
__version__ = "0.17.1"
12 changes: 6 additions & 6 deletions api_app/api/dependencies/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
from typing import Callable, Type

from azure.cosmos.aio import CosmosClient
Expand All @@ -9,10 +8,11 @@
from db.errors import UnableToAccessDatabase
from db.repositories.base import BaseRepository
from resources import strings
from services.logging import logger


async def connect_to_db() -> CosmosClient:
logging.debug(f"Connecting to {config.STATE_STORE_ENDPOINT}")
logger.debug(f"Connecting to {config.STATE_STORE_ENDPOINT}")

try:
async with credentials.get_credential_async() as credential:
Expand All @@ -27,10 +27,10 @@ async def connect_to_db() -> CosmosClient:
cosmos_client = CosmosClient(
config.STATE_STORE_ENDPOINT, primary_master_key, connection_verify=False
)
logging.debug("Connection established")
logger.debug("Connection established")
return cosmos_client
except Exception:
logging.exception("Connection to state store could not be established.")
logger.exception("Connection to state store could not be established.")


async def get_store_key(credential) -> str:
Expand All @@ -53,7 +53,7 @@ async def get_store_key(credential) -> str:


async def get_db_client(app: FastAPI) -> CosmosClient:
if not app.state.cosmos_client:
if not hasattr(app.state, 'cosmos_client') or not app.state.cosmos_client:
app.state.cosmos_client = await connect_to_db()
return app.state.cosmos_client

Expand All @@ -71,7 +71,7 @@ async def _get_repo(
try:
return await repo_type.create(client)
except UnableToAccessDatabase:
logging.exception(strings.STATE_STORE_ENDPOINT_NOT_RESPONDING)
logger.exception(strings.STATE_STORE_ENDPOINT_NOT_RESPONDING)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.STATE_STORE_ENDPOINT_NOT_RESPONDING,
Expand Down
9 changes: 5 additions & 4 deletions api_app/api/errors/generic_error.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging
from resources import strings

from fastapi import Request
from fastapi.responses import PlainTextResponse

from services.logging import logger


async def generic_error_handler(_: Request, exception: Exception) -> PlainTextResponse:
logging.debug("=====================================")
logging.exception(exception)
logging.debug("=====================================")
logger.debug("=====================================")
logger.exception(exception)
logger.debug("=====================================")
return PlainTextResponse(strings.UNABLE_TO_PROCESS_REQUEST, status_code=500)
14 changes: 7 additions & 7 deletions api_app/api/routes/airlock.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
from typing import Optional

from fastapi import APIRouter, Depends, HTTPException, status as status_code, Response
Expand Down Expand Up @@ -27,6 +26,7 @@

from services.airlock import create_review_vm, review_airlock_request, get_airlock_container_link, get_allowed_actions, save_and_publish_event_airlock_request, update_and_publish_event_airlock_request, \
enrich_requests_with_allowed_actions, get_airlock_requests_by_user_and_workspace, cancel_request
from services.logging import logger

airlock_workspace_router = APIRouter(dependencies=[Depends(get_current_workspace_owner_or_researcher_user_or_airlock_manager)])

Expand All @@ -46,7 +46,7 @@ async def create_draft_request(airlock_request_input: AirlockRequestInCreate, us
allowed_actions = get_allowed_actions(airlock_request, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActions(airlockRequest=airlock_request, allowedUserActions=allowed_actions)
except (ValidationError, ValueError) as e:
logging.exception("Failed creating airlock request model instance")
logger.exception("Failed creating airlock request model instance")
raise HTTPException(status_code=status_code.HTTP_400_BAD_REQUEST, detail=str(e))


Expand All @@ -69,7 +69,7 @@ async def get_all_airlock_requests_by_workspace(
airlock_requests_with_allowed_user_actions = enrich_requests_with_allowed_actions(airlock_requests, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActionsInList(airlockRequests=airlock_requests_with_allowed_user_actions)
except (ValidationError, ValueError) as e:
logging.exception("Failed retrieving all the airlock requests for a workspace")
logger.exception("Failed retrieving all the airlock requests for a workspace")
raise HTTPException(status_code=status_code.HTTP_400_BAD_REQUEST, detail=str(e))


Expand Down Expand Up @@ -137,17 +137,17 @@ async def create_review_user_resource(
response.headers["Location"] = construct_location_header(operation)
return AirlockRequestAndOperationInResponse(airlockRequest=updated_resource, operation=operation)
except (KeyError, TypeError, EntityDoesNotExist) as e:
logging.exception("Failed to retrieve Airlock Review configuration for workspace %s", workspace.id)
logger.exception("Failed to retrieve Airlock Review configuration for workspace %s", workspace.id)
raise HTTPException(status_code=status_code.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Failed to retrieve Airlock Review configuration for workspace {workspace.id}.\
Please ask your TRE administrator to check the configuration. Details: {str(e)}")
except (ValidationError, ValueError) as e:
logging.exception("Failed create user resource model instance due to validation error")
logger.exception("Failed create user resource model instance due to validation error")
raise HTTPException(status_code=status_code.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Invalid configuration for creating user resource. Please contact your TRE administrator. \
Details: {str(e)}")
except UserNotAuthorizedToUseTemplate as e:
logging.exception("User not authorized to use template")
logger.exception("User not authorized to use template")
raise HTTPException(status_code=status_code.HTTP_403_FORBIDDEN, detail=str(e))


Expand All @@ -171,7 +171,7 @@ async def create_airlock_review(
allowed_actions = get_allowed_actions(updated_airlock_request, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActions(airlockRequest=updated_airlock_request, allowedUserActions=allowed_actions)
except (ValidationError, ValueError) as e:
logging.exception("Failed creating airlock review model instance")
logger.exception("Failed creating airlock review model instance")
raise HTTPException(status_code=status_code.HTTP_400_BAD_REQUEST, detail=str(e))


Expand Down
7 changes: 4 additions & 3 deletions api_app/api/routes/costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from dateutil.relativedelta import relativedelta
from fastapi import APIRouter, Depends, Query, HTTPException, status
from fastapi.responses import JSONResponse
import logging
from typing import Optional

from pydantic import UUID4
Expand All @@ -18,6 +17,8 @@
from resources import strings
from services.authentication import get_current_admin_user, get_current_workspace_owner_or_tre_admin
from services.cost_service import CostService, ServiceUnavailable, SubscriptionNotSupported, TooManyRequests, WorkspaceDoesNotExist, cost_service_factory
from services.logging import logger


costs_core_router = APIRouter(dependencies=[Depends(get_current_admin_user)])
costs_workspace_router = APIRouter(dependencies=[Depends(get_current_workspace_owner_or_tre_admin)])
Expand Down Expand Up @@ -79,7 +80,7 @@ async def costs(
"retry-after": str(e.retry_after)
}}, status_code=503, headers={"Retry-After": str(e.retry_after)})
except Exception:
logging.exception("Failed to query Azure TRE costs")
logger.exception("Failed to query Azure TRE costs")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=strings.API_GET_COSTS_INTERNAL_SERVER_ERROR)


Expand Down Expand Up @@ -117,5 +118,5 @@ async def workspace_costs(workspace_id: UUID4, params: CostsQueryParams = Depend
"retry-after": str(e.retry_after)
}}, status_code=503, headers={"Retry-After": str(e.retry_after)})
except Exception:
logging.exception("Failed to query Azure TRE costs")
logger.exception("Failed to query Azure TRE costs")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=strings.API_GET_COSTS_INTERNAL_SERVER_ERROR)
9 changes: 4 additions & 5 deletions api_app/api/routes/health.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import asyncio
import logging
from fastapi import APIRouter
from core import credentials
from models.schemas.status import HealthCheck, ServiceStatus, StatusEnum
from resources import strings
from services.health_checker import create_resource_processor_status, create_state_store_status, create_service_bus_status

from services.logging import logger

router = APIRouter()

Expand All @@ -25,9 +24,9 @@ async def health_check() -> HealthCheck:
sb_status, sb_message = sb
rp_status, rp_message = rp
if cosmos_status == StatusEnum.not_ok or sb_status == StatusEnum.not_ok or rp_status == StatusEnum.not_ok:
logging.error(f'Cosmos Status: {cosmos_status}, message: {cosmos_message}')
logging.error(f'Service Bus Status: {sb_status}, message: {sb_message}')
logging.error(f'Resource Processor Status: {rp_status}, message: {rp_message}')
logger.error(f'Cosmos Status: {cosmos_status}, message: {cosmos_message}')
logger.error(f'Service Bus Status: {sb_status}, message: {sb_message}')
logger.error(f'Resource Processor Status: {rp_status}, message: {rp_message}')

services = [ServiceStatus(service=strings.COSMOS_DB, status=cosmos_status, message=cosmos_message),
ServiceStatus(service=strings.SERVICE_BUS, status=sb_status, message=sb_message),
Expand Down
28 changes: 13 additions & 15 deletions api_app/api/routes/migrations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import logging

from fastapi import APIRouter, Depends, HTTPException, status
from db.migrations.airlock import AirlockMigration
from db.migrations.resources import ResourceMigration
Expand All @@ -12,7 +10,7 @@
from db.migrations.workspaces import WorkspaceMigration
from db.repositories.resources import ResourceRepository
from models.schemas.migrations import MigrationOutList, Migration

from services.logging import logger

migrations_core_router = APIRouter(dependencies=[Depends(get_current_admin_user)])

Expand All @@ -31,59 +29,59 @@ async def migrate_database(resources_repo=Depends(get_repository(ResourceReposit
airlock_migration=Depends(get_repository(AirlockMigration)),):
try:
migrations = list()
logging.info("PR 1030")
logger.info("PR 1030")
await resources_repo.rename_field_name('resourceTemplateName', 'templateName')
await resources_repo.rename_field_name('resourceTemplateVersion', 'templateVersion')
await resources_repo.rename_field_name('resourceTemplateParameters', 'properties')
migrations.append(Migration(issueNumber="PR 1030", status="Executed"))

logging.info("PR 1031")
logger.info("PR 1031")
await resources_repo.rename_field_name('workspaceType', 'templateName')
await resources_repo.rename_field_name('workspaceServiceType', 'templateName')
await resources_repo.rename_field_name('userResourceType', 'templateName')
migrations.append(Migration(issueNumber="PR 1031", status="Executed"))

logging.info("PR 1717 - Shared services")
logger.info("PR 1717 - Shared services")
migration_status = "Executed" if await shared_services_migration.deleteDuplicatedSharedServices() else "Skipped"
migrations.append(Migration(issueNumber="PR 1717", status=migration_status))

logging.info("PR 1726 - Authentication needs to be in properties so we can update them")
logger.info("PR 1726 - Authentication needs to be in properties so we can update them")
migration_status = "Executed" if await workspace_migration.moveAuthInformationToProperties() else "Skipped"
migrations.append(Migration(issueNumber="PR 1726", status=migration_status))

logging.info("PR 1406 - Extra field to support UI")
logger.info("PR 1406 - Extra field to support UI")
num_rows = await resource_migration.add_deployment_status_field(operations_repo)
migrations.append(Migration(issueNumber="1406", status=f'Updated {num_rows} resource objects'))

logging.info("PR 3066 - Archive resources history")
logger.info("PR 3066 - Archive resources history")
num_rows = await resource_migration.archive_history(resource_history_repo)
migrations.append(Migration(issueNumber="3066", status=f'Updated {num_rows} resource objects'))

logging.info("PR 2371 - Validate min firewall version")
logger.info("PR 2371 - Validate min firewall version")
await shared_services_migration.checkMinFirewallVersion()
migrations.append(Migration(issueNumber="2371", status='Firewall version meets requirement'))

logging.info("PR 2779 - Restructure Airlock requests & add createdBy field")
logger.info("PR 2779 - Restructure Airlock requests & add createdBy field")
await airlock_migration.rename_field_name('requestType', 'type')
await airlock_migration.rename_field_name('requestTitle', 'title')
await airlock_migration.rename_field_name('user', 'updatedBy')
await airlock_migration.rename_field_name('creationTime', 'createdWhen')
num_updated = await airlock_migration.add_created_by_and_rename_in_history()
migrations.append(Migration(issueNumber="2779", status=f'Renamed fields & updated {num_updated} airlock requests with createdBy'))

logging.info("PR 2883 - Support multiple reviewer VMs per Airlock request")
logger.info("PR 2883 - Support multiple reviewer VMs per Airlock request")
num_updated = await airlock_migration.change_review_resources_to_dict()
migrations.append(Migration(issueNumber="2883", status=f'Updated {num_updated} airlock requests with new reviewUserResources format'))

logging.info("PR 3152 - Migrate reviewDecision of Airlock Reviews")
logger.info("PR 3152 - Migrate reviewDecision of Airlock Reviews")
num_updated = await airlock_migration.update_review_decision_values()
migrations.append(Migration(issueNumber="3152", status=f'Updated {num_updated} airlock requests with new reviewDecision value'))

logging.info("PR 3358 - Migrate OperationSteps of Operations")
logger.info("PR 3358 - Migrate OperationSteps of Operations")
num_updated = await resource_migration.migrate_step_id_of_operation_steps(operations_repo)
migrations.append(Migration(issueNumber="3358", status=f'Updated {num_updated} operations'))

return MigrationOutList(migrations=migrations)
except Exception as e:
logging.exception("Failed to migrate database")
logger.exception("Failed to migrate database")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
12 changes: 6 additions & 6 deletions api_app/api/routes/resource_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from datetime import datetime
import logging
import semantic_version
from copy import deepcopy
from typing import Dict, Any, Optional
Expand All @@ -26,6 +25,7 @@
RequestAction,
)
from services.authentication import get_access_service
from services.logging import logger


async def delete_validation(resource: Resource, resource_repo: ResourceRepository):
Expand Down Expand Up @@ -75,7 +75,7 @@ async def save_and_deploy_resource(
)
await resource_repo.save_item(masked_resource)
except Exception:
logging.exception(f"Failed saving resource item {resource.id}")
logger.exception(f"Failed saving resource item {resource.id}")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.STATE_STORE_ENDPOINT_NOT_RESPONDING,
Expand All @@ -94,7 +94,7 @@ async def save_and_deploy_resource(
return operation
except Exception:
await resource_repo.delete_item(resource.id)
logging.exception("Failed send resource request message")
logger.exception("Failed send resource request message")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.SERVICE_BUS_GENERAL_ERROR_MESSAGE,
Expand Down Expand Up @@ -189,7 +189,7 @@ async def send_uninstall_message(
)
return operation
except Exception:
logging.exception(f"Failed to send {resource_type} resource delete message")
logger.exception(f"Failed to send {resource_type} resource delete message")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.SERVICE_BUS_GENERAL_ERROR_MESSAGE,
Expand Down Expand Up @@ -240,7 +240,7 @@ async def send_custom_action_message(
)
return operation
except Exception:
logging.exception(f"Failed to send {resource_type} resource custom action message")
logger.exception(f"Failed to send {resource_type} resource custom action message")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.SERVICE_BUS_GENERAL_ERROR_MESSAGE,
Expand Down Expand Up @@ -278,7 +278,7 @@ async def get_template(
detail=strings.NO_UNIQUE_CURRENT_FOR_TEMPLATE,
)
except Exception as e:
logging.debug(e)
logger.debug(e)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.STATE_STORE_ENDPOINT_NOT_RESPONDING,
Expand Down
Loading

0 comments on commit c382f3d

Please sign in to comment.