Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to Open Telemetry for logging #3762

Merged
merged 20 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@
"console": "integratedTerminal",
"preLaunchTask": "Copy_env_file_for_api_debug",
"cwd": "${workspaceFolder}/api_app",
"envFile": "${workspaceFolder}/api_app/.env"
"envFile": "${workspaceFolder}/api_app/.env",
"env": {
"OTEL_RESOURCE_ATTRIBUTES": "service.name=api,service.instance.id=local_debug,service.version=dev"
}
},
{
"name": "E2E Extended",
Expand Down Expand Up @@ -190,8 +193,10 @@
"cwd": "${workspaceFolder}/resource_processor",
"envFile": "${workspaceFolder}/core/private.env",
"env": {
"PYTHONPATH": "."
}
"PYTHONPATH": ".",
"OTEL_RESOURCE_ATTRIBUTES": "service.name=resource_processor,service.instance.id=local_debug,service.version=dev"
},
"justMyCode": false
},
{
"name": "Debug Python file",
Expand Down
3 changes: 2 additions & 1 deletion api_app/.env.sample
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# API configuration
# -----------------
# When debug is set to True, debugging information for unhandled exceptions is shown in the swagger UI and logging is more verbose
# DEBUG=True
# LOGGING_LEVEL can be set to DEBUG, INFO, WARNING, ERROR or CRITICAL
LOGGING_LEVEL="INFO"

# OAUTH information - client ids etc. for the AAD Apps
# ----------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion api_app/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.16.8"
__version__ = "0.17.0"
12 changes: 6 additions & 6 deletions api_app/api/dependencies/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
from typing import Callable, Type

from azure.cosmos.aio import CosmosClient
Expand All @@ -9,10 +8,11 @@
from db.errors import UnableToAccessDatabase
from db.repositories.base import BaseRepository
from resources import strings
from services.logging import logger


async def connect_to_db() -> CosmosClient:
logging.debug(f"Connecting to {config.STATE_STORE_ENDPOINT}")
logger.debug(f"Connecting to {config.STATE_STORE_ENDPOINT}")

try:
async with credentials.get_credential_async() as credential:
Expand All @@ -27,10 +27,10 @@ async def connect_to_db() -> CosmosClient:
cosmos_client = CosmosClient(
config.STATE_STORE_ENDPOINT, primary_master_key, connection_verify=False
)
logging.debug("Connection established")
logger.debug("Connection established")
return cosmos_client
except Exception:
logging.exception("Connection to state store could not be established.")
logger.exception("Connection to state store could not be established.")


async def get_store_key(credential) -> str:
Expand All @@ -53,7 +53,7 @@ async def get_store_key(credential) -> str:


async def get_db_client(app: FastAPI) -> CosmosClient:
if not app.state.cosmos_client:
if not hasattr(app.state, 'cosmos_client') or not app.state.cosmos_client:
app.state.cosmos_client = await connect_to_db()
return app.state.cosmos_client

Expand All @@ -71,7 +71,7 @@ async def _get_repo(
try:
return await repo_type.create(client)
except UnableToAccessDatabase:
logging.exception(strings.STATE_STORE_ENDPOINT_NOT_RESPONDING)
logger.exception(strings.STATE_STORE_ENDPOINT_NOT_RESPONDING)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.STATE_STORE_ENDPOINT_NOT_RESPONDING,
Expand Down
9 changes: 5 additions & 4 deletions api_app/api/errors/generic_error.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging
from resources import strings

from fastapi import Request
from fastapi.responses import PlainTextResponse

from services.logging import logger


async def generic_error_handler(_: Request, exception: Exception) -> PlainTextResponse:
logging.debug("=====================================")
logging.exception(exception)
logging.debug("=====================================")
logger.debug("=====================================")
logger.exception(exception)
logger.debug("=====================================")
return PlainTextResponse(strings.UNABLE_TO_PROCESS_REQUEST, status_code=500)
14 changes: 7 additions & 7 deletions api_app/api/routes/airlock.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
from typing import Optional

from fastapi import APIRouter, Depends, HTTPException, status as status_code, Response
Expand Down Expand Up @@ -27,6 +26,7 @@

from services.airlock import create_review_vm, review_airlock_request, get_airlock_container_link, get_allowed_actions, save_and_publish_event_airlock_request, update_and_publish_event_airlock_request, \
enrich_requests_with_allowed_actions, get_airlock_requests_by_user_and_workspace, cancel_request
from services.logging import logger

airlock_workspace_router = APIRouter(dependencies=[Depends(get_current_workspace_owner_or_researcher_user_or_airlock_manager)])

Expand All @@ -46,7 +46,7 @@ async def create_draft_request(airlock_request_input: AirlockRequestInCreate, us
allowed_actions = get_allowed_actions(airlock_request, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActions(airlockRequest=airlock_request, allowedUserActions=allowed_actions)
except (ValidationError, ValueError) as e:
logging.exception("Failed creating airlock request model instance")
logger.exception("Failed creating airlock request model instance")
raise HTTPException(status_code=status_code.HTTP_400_BAD_REQUEST, detail=str(e))


Expand All @@ -69,7 +69,7 @@ async def get_all_airlock_requests_by_workspace(
airlock_requests_with_allowed_user_actions = enrich_requests_with_allowed_actions(airlock_requests, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActionsInList(airlockRequests=airlock_requests_with_allowed_user_actions)
except (ValidationError, ValueError) as e:
logging.exception("Failed retrieving all the airlock requests for a workspace")
logger.exception("Failed retrieving all the airlock requests for a workspace")
raise HTTPException(status_code=status_code.HTTP_400_BAD_REQUEST, detail=str(e))


Expand Down Expand Up @@ -137,17 +137,17 @@ async def create_review_user_resource(
response.headers["Location"] = construct_location_header(operation)
return AirlockRequestAndOperationInResponse(airlockRequest=updated_resource, operation=operation)
except (KeyError, TypeError, EntityDoesNotExist) as e:
logging.exception("Failed to retrieve Airlock Review configuration for workspace %s", workspace.id)
logger.exception("Failed to retrieve Airlock Review configuration for workspace %s", workspace.id)
raise HTTPException(status_code=status_code.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Failed to retrieve Airlock Review configuration for workspace {workspace.id}.\
Please ask your TRE administrator to check the configuration. Details: {str(e)}")
except (ValidationError, ValueError) as e:
logging.exception("Failed create user resource model instance due to validation error")
logger.exception("Failed create user resource model instance due to validation error")
raise HTTPException(status_code=status_code.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Invalid configuration for creating user resource. Please contact your TRE administrator. \
Details: {str(e)}")
except UserNotAuthorizedToUseTemplate as e:
logging.exception("User not authorized to use template")
logger.exception("User not authorized to use template")
raise HTTPException(status_code=status_code.HTTP_403_FORBIDDEN, detail=str(e))


Expand All @@ -171,7 +171,7 @@ async def create_airlock_review(
allowed_actions = get_allowed_actions(updated_airlock_request, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActions(airlockRequest=updated_airlock_request, allowedUserActions=allowed_actions)
except (ValidationError, ValueError) as e:
logging.exception("Failed creating airlock review model instance")
logger.exception("Failed creating airlock review model instance")
raise HTTPException(status_code=status_code.HTTP_400_BAD_REQUEST, detail=str(e))


Expand Down
7 changes: 4 additions & 3 deletions api_app/api/routes/costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from dateutil.relativedelta import relativedelta
from fastapi import APIRouter, Depends, Query, HTTPException, status
from fastapi.responses import JSONResponse
import logging
from typing import Optional

from pydantic import UUID4
Expand All @@ -18,6 +17,8 @@
from resources import strings
from services.authentication import get_current_admin_user, get_current_workspace_owner_or_tre_admin
from services.cost_service import CostService, ServiceUnavailable, SubscriptionNotSupported, TooManyRequests, WorkspaceDoesNotExist, cost_service_factory
from services.logging import logger


costs_core_router = APIRouter(dependencies=[Depends(get_current_admin_user)])
costs_workspace_router = APIRouter(dependencies=[Depends(get_current_workspace_owner_or_tre_admin)])
Expand Down Expand Up @@ -79,7 +80,7 @@ async def costs(
"retry-after": str(e.retry_after)
}}, status_code=503, headers={"Retry-After": str(e.retry_after)})
except Exception:
logging.exception("Failed to query Azure TRE costs")
logger.exception("Failed to query Azure TRE costs")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=strings.API_GET_COSTS_INTERNAL_SERVER_ERROR)


Expand Down Expand Up @@ -117,5 +118,5 @@ async def workspace_costs(workspace_id: UUID4, params: CostsQueryParams = Depend
"retry-after": str(e.retry_after)
}}, status_code=503, headers={"Retry-After": str(e.retry_after)})
except Exception:
logging.exception("Failed to query Azure TRE costs")
logger.exception("Failed to query Azure TRE costs")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=strings.API_GET_COSTS_INTERNAL_SERVER_ERROR)
9 changes: 4 additions & 5 deletions api_app/api/routes/health.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import asyncio
import logging
from fastapi import APIRouter
from core import credentials
from models.schemas.status import HealthCheck, ServiceStatus, StatusEnum
from resources import strings
from services.health_checker import create_resource_processor_status, create_state_store_status, create_service_bus_status

from services.logging import logger

router = APIRouter()

Expand All @@ -25,9 +24,9 @@ async def health_check() -> HealthCheck:
sb_status, sb_message = sb
rp_status, rp_message = rp
if cosmos_status == StatusEnum.not_ok or sb_status == StatusEnum.not_ok or rp_status == StatusEnum.not_ok:
logging.error(f'Cosmos Status: {cosmos_status}, message: {cosmos_message}')
logging.error(f'Service Bus Status: {sb_status}, message: {sb_message}')
logging.error(f'Resource Processor Status: {rp_status}, message: {rp_message}')
logger.error(f'Cosmos Status: {cosmos_status}, message: {cosmos_message}')
logger.error(f'Service Bus Status: {sb_status}, message: {sb_message}')
logger.error(f'Resource Processor Status: {rp_status}, message: {rp_message}')

services = [ServiceStatus(service=strings.COSMOS_DB, status=cosmos_status, message=cosmos_message),
ServiceStatus(service=strings.SERVICE_BUS, status=sb_status, message=sb_message),
Expand Down
28 changes: 13 additions & 15 deletions api_app/api/routes/migrations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import logging

from fastapi import APIRouter, Depends, HTTPException, status
from db.migrations.airlock import AirlockMigration
from db.migrations.resources import ResourceMigration
Expand All @@ -12,7 +10,7 @@
from db.migrations.workspaces import WorkspaceMigration
from db.repositories.resources import ResourceRepository
from models.schemas.migrations import MigrationOutList, Migration

from services.logging import logger

migrations_core_router = APIRouter(dependencies=[Depends(get_current_admin_user)])

Expand All @@ -31,59 +29,59 @@ async def migrate_database(resources_repo=Depends(get_repository(ResourceReposit
airlock_migration=Depends(get_repository(AirlockMigration)),):
try:
migrations = list()
logging.info("PR 1030")
logger.info("PR 1030")
await resources_repo.rename_field_name('resourceTemplateName', 'templateName')
await resources_repo.rename_field_name('resourceTemplateVersion', 'templateVersion')
await resources_repo.rename_field_name('resourceTemplateParameters', 'properties')
migrations.append(Migration(issueNumber="PR 1030", status="Executed"))

logging.info("PR 1031")
logger.info("PR 1031")
await resources_repo.rename_field_name('workspaceType', 'templateName')
await resources_repo.rename_field_name('workspaceServiceType', 'templateName')
await resources_repo.rename_field_name('userResourceType', 'templateName')
migrations.append(Migration(issueNumber="PR 1031", status="Executed"))

logging.info("PR 1717 - Shared services")
logger.info("PR 1717 - Shared services")
migration_status = "Executed" if await shared_services_migration.deleteDuplicatedSharedServices() else "Skipped"
migrations.append(Migration(issueNumber="PR 1717", status=migration_status))

logging.info("PR 1726 - Authentication needs to be in properties so we can update them")
logger.info("PR 1726 - Authentication needs to be in properties so we can update them")
migration_status = "Executed" if await workspace_migration.moveAuthInformationToProperties() else "Skipped"
migrations.append(Migration(issueNumber="PR 1726", status=migration_status))

logging.info("PR 1406 - Extra field to support UI")
logger.info("PR 1406 - Extra field to support UI")
num_rows = await resource_migration.add_deployment_status_field(operations_repo)
migrations.append(Migration(issueNumber="1406", status=f'Updated {num_rows} resource objects'))

logging.info("PR 3066 - Archive resources history")
logger.info("PR 3066 - Archive resources history")
num_rows = await resource_migration.archive_history(resource_history_repo)
migrations.append(Migration(issueNumber="3066", status=f'Updated {num_rows} resource objects'))

logging.info("PR 2371 - Validate min firewall version")
logger.info("PR 2371 - Validate min firewall version")
await shared_services_migration.checkMinFirewallVersion()
migrations.append(Migration(issueNumber="2371", status='Firewall version meets requirement'))

logging.info("PR 2779 - Restructure Airlock requests & add createdBy field")
logger.info("PR 2779 - Restructure Airlock requests & add createdBy field")
await airlock_migration.rename_field_name('requestType', 'type')
await airlock_migration.rename_field_name('requestTitle', 'title')
await airlock_migration.rename_field_name('user', 'updatedBy')
await airlock_migration.rename_field_name('creationTime', 'createdWhen')
num_updated = await airlock_migration.add_created_by_and_rename_in_history()
migrations.append(Migration(issueNumber="2779", status=f'Renamed fields & updated {num_updated} airlock requests with createdBy'))

logging.info("PR 2883 - Support multiple reviewer VMs per Airlock request")
logger.info("PR 2883 - Support multiple reviewer VMs per Airlock request")
num_updated = await airlock_migration.change_review_resources_to_dict()
migrations.append(Migration(issueNumber="2883", status=f'Updated {num_updated} airlock requests with new reviewUserResources format'))

logging.info("PR 3152 - Migrate reviewDecision of Airlock Reviews")
logger.info("PR 3152 - Migrate reviewDecision of Airlock Reviews")
num_updated = await airlock_migration.update_review_decision_values()
migrations.append(Migration(issueNumber="3152", status=f'Updated {num_updated} airlock requests with new reviewDecision value'))

logging.info("PR 3358 - Migrate OperationSteps of Operations")
logger.info("PR 3358 - Migrate OperationSteps of Operations")
num_updated = await resource_migration.migrate_step_id_of_operation_steps(operations_repo)
migrations.append(Migration(issueNumber="3358", status=f'Updated {num_updated} operations'))

return MigrationOutList(migrations=migrations)
except Exception as e:
logging.exception("Failed to migrate database")
logger.exception("Failed to migrate database")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
12 changes: 6 additions & 6 deletions api_app/api/routes/resource_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from datetime import datetime
import logging
import semantic_version
from copy import deepcopy
from typing import Dict, Any, Optional
Expand All @@ -26,6 +25,7 @@
RequestAction,
)
from services.authentication import get_access_service
from services.logging import logger


async def delete_validation(resource: Resource, resource_repo: ResourceRepository):
Expand Down Expand Up @@ -75,7 +75,7 @@ async def save_and_deploy_resource(
)
await resource_repo.save_item(masked_resource)
except Exception:
logging.exception(f"Failed saving resource item {resource.id}")
logger.exception(f"Failed saving resource item {resource.id}")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.STATE_STORE_ENDPOINT_NOT_RESPONDING,
Expand All @@ -94,7 +94,7 @@ async def save_and_deploy_resource(
return operation
except Exception:
await resource_repo.delete_item(resource.id)
logging.exception("Failed send resource request message")
logger.exception("Failed send resource request message")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.SERVICE_BUS_GENERAL_ERROR_MESSAGE,
Expand Down Expand Up @@ -189,7 +189,7 @@ async def send_uninstall_message(
)
return operation
except Exception:
logging.exception(f"Failed to send {resource_type} resource delete message")
logger.exception(f"Failed to send {resource_type} resource delete message")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.SERVICE_BUS_GENERAL_ERROR_MESSAGE,
Expand Down Expand Up @@ -240,7 +240,7 @@ async def send_custom_action_message(
)
return operation
except Exception:
logging.exception(f"Failed to send {resource_type} resource custom action message")
logger.exception(f"Failed to send {resource_type} resource custom action message")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.SERVICE_BUS_GENERAL_ERROR_MESSAGE,
Expand Down Expand Up @@ -278,7 +278,7 @@ async def get_template(
detail=strings.NO_UNIQUE_CURRENT_FOR_TEMPLATE,
)
except Exception as e:
logging.debug(e)
logger.debug(e)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=strings.STATE_STORE_ENDPOINT_NOT_RESPONDING,
Expand Down
Loading
Loading