diff --git a/covid_api/api/api_v1/endpoints/detections.py b/covid_api/api/api_v1/endpoints/detections.py index 8170db1..f649916 100644 --- a/covid_api/api/api_v1/endpoints/detections.py +++ b/covid_api/api/api_v1/endpoints/detections.py @@ -33,7 +33,5 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str): key=f"detections-{ml_type.value}/{site.value}/{date}.geojson", ) ) - # TODO: catch the specific exception that corresponds to a missing file - # and raise 404, otherwise raise a generic 500 error. except Exception: raise HTTPException(status_code=404, detail="Detections not found") diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index b2457a4..d050b0c 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,182 +1,23 @@ """API metadata.""" -import re -from concurrent import futures -from datetime import datetime, timedelta -from typing import List, Union - -from dateutil.relativedelta import relativedelta from covid_api.api.utils import get_zonal_stat -from covid_api.core.config import API_VERSION_STR -from covid_api.db.static.datasets import datasets as _datasets -from covid_api.db.static.errors import InvalidIdentifier -from covid_api.db.static.sites import sites -from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue -from fastapi import APIRouter, HTTPException - -from starlette.requests import Request +from fastapi import APIRouter router = APIRouter() -# TODO: validate inputs with typing/pydantic models -def _get_mean_median(query, url, dataset): - - # format S3 URL template with spotlightId, if dataset is - # spotlight specific - if "{spotlightId}" in url: - if not query.spotlight_id: - raise HTTPException( - status_code=400, - detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", - ) - url = _insert_spotlight_id(url, query.spotlight_id) - try: - mean, median = get_zonal_stat(query.geojson, url) - return dict(mean=mean, median=median) - - except Exception: - raise HTTPException( - status_code=400, - detail=( - "Unable to calculate mean/median values. This either due to a bounding box " - "extending beyond the edges of the COG or there are no COGs available for the " - "requested date range." - ), - ) - - @router.post( "/timelapse", responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=Union[List[TimelapseValue], TimelapseValue], - response_model_exclude_none=True, + response_model=TimelapseValue, ) -def timelapse(request: Request, query: TimelapseRequest): +def timelapse(query: TimelapseRequest): """Handle /timelapse requests.""" - - # get dataset metadata for the requested dataset - # will be used to validate other parts of the query - dataset = _get_dataset_metadata(request, query) - - # extract S3 URL template from dataset metadata info - url = _extract_s3_url(dataset) - - if query.date: - - # format S3 URL template with date object - url = _insert_date(url, dataset, query.date) - return _get_mean_median(query, url, dataset) - - # Gather a list of dates to query - if query.date_range: - - if dataset.time_unit == "day": - # Get start and end dates - start = datetime.strptime(query.date_range[0], "%Y_%m_%d") - end = datetime.strptime(query.date_range[1], "%Y_%m_%d") - - # Populated all days in between Add 1 to days to ensure it contains the end date as well - dates = [ - datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") - for x in range(0, (end - start).days + 1) - ] - - if dataset.time_unit == "month": - # Get start and end dates, as a - start = datetime.strptime(query.date_range[0], "%Y%m") - end = datetime.strptime(query.date_range[1], "%Y%m") - num_months = (end.year - start.year) * 12 + (end.month - start.month) - dates = [ - datetime.strftime((start + relativedelta(months=+x)), "%Y%m") - for x in range(0, num_months + 1) - ] - - with futures.ThreadPoolExecutor(max_workers=15) as executor: - future_stats_queries = { - executor.submit( - _get_mean_median, query, _insert_date(url, dataset, date), dataset - ): date - for date in dates - } - - stats = [] - - for future in futures.as_completed(future_stats_queries): - date = future_stats_queries[future] - try: - stats.append({"date": date, **future.result()}) - except HTTPException as e: - - stats.append({"date": date, "error": e.detail}) - return stats - - -def _get_dataset_metadata(request: Request, query: TimelapseRequest): - - scheme = request.url.scheme - host = request.headers["host"] - - if API_VERSION_STR: - host += API_VERSION_STR - - dataset = list( - filter( - lambda d: d.id == query.dataset_id, - _datasets.get_all(api_url=f"{scheme}://{host}").datasets, - ) - ) - - if not dataset: - raise HTTPException( - status_code=404, detail=f"No dataset found for id: {query.dataset_id}" - ) - - dataset = dataset[0] - - if dataset.source.type != "raster": - raise HTTPException( - status_code=400, - detail=f"Dataset {query.dataset_id} is not a raster-type dataset", - ) - - return dataset - - -def _extract_s3_url(dataset: Dataset): - url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) - if not url_search: - raise HTTPException(status_code=500) - - return url_search.group(1) - - -def _insert_date(url: str, dataset: Dataset, date: str): - _validate_query_date(dataset, date) - return url.replace("{date}", date) - - -def _validate_query_date(dataset: Dataset, date: str): - date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" - try: - return datetime.strptime(date, date_format) - except ValueError: - raise HTTPException( - status_code=400, - detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM", - ) - - -def _insert_spotlight_id(url: str, spotlight_id: str): - if not spotlight_id: - raise HTTPException(status_code=400, detail="Missing spotlightId") - try: - sites.get(spotlight_id) - except InvalidIdentifier: - raise HTTPException( - status_code=404, detail=f"No spotlight found for id: {spotlight_id}" - ) - - return url.replace("{spotlightId}", spotlight_id) + if query.type == "no2": + url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif" + else: + url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif" + mean, median = get_zonal_stat(query.geojson, url) + return dict(mean=mean, median=median) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 8c2d406..1267ea0 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -223,13 +223,8 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) - # Create a mask of the data that filters out the tile's `nodata` value. In order - # to ensure the average calculation isn't incorrectly affected by large, negative, - # `nodata` values. - masked_data = np.ma.masked_equal(data[0], src.nodata) - return ( - np.average(masked_data, weights=pctcover), + np.average(data[0], weights=pctcover), np.nanmedian(data), ) diff --git a/covid_api/db/static/datasets/no2-diff.json b/covid_api/db/static/datasets/no2-diff.json index d69f09e..987f70c 100644 --- a/covid_api/db/static/datasets/no2-diff.json +++ b/covid_api/db/static/datasets/no2-diff.json @@ -3,7 +3,7 @@ "name": "NO\u2082 (Diff)", "type": "raster-timeseries", "time_unit": "month", - "is_periodic": true, + "is_periodic": false, "s3_location": "OMNO2d_HRMDifference", "source": { "type": "raster", diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 1e7dfa6..8275a1d 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,15 +1,8 @@ """Tilelapse models.""" -import re -from typing import List, Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon -from pydantic import BaseModel, validator - - -def to_camel(s): - """Convert string s from `snake_case` to `camelCase`""" - return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) +from pydantic import BaseModel class PolygonFeature(Feature): @@ -19,35 +12,15 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): - """ "Timelapse values model.""" + """"Timelapse values model.""" - mean: Optional[float] - median: Optional[float] - date: Optional[str] - error: Optional[str] + mean: float + median: float class TimelapseRequest(BaseModel): - """ "Timelapse request model.""" + """"Timelapse request model.""" - # TODO: parse this into a python `datetime` object (maybe using a validator? ) - # TODO: validate that exactly one of `date` or `date_range` is supplied - date: Optional[str] - date_range: Optional[List[str]] + month: str geojson: PolygonFeature - dataset_id: str - spotlight_id: Optional[str] - - @validator("date_range") - def validate_date_objects(cls, v): - - """Validator""" - if not len(v) == 2: - raise ValueError("Field `dateRange` must contain exactly 2 dates") - return v - - class Config: - """Generate alias to convert `camelCase` requests to `snake_case` fields to be used - within the code""" - - alias_generator = to_camel + type: str diff --git a/lambda/dataset_metadata_generator/src/main.py b/lambda/dataset_metadata_generator/src/main.py index fb6a010..49abb49 100644 --- a/lambda/dataset_metadata_generator/src/main.py +++ b/lambda/dataset_metadata_generator/src/main.py @@ -27,6 +27,10 @@ ).Bucket(BUCKET_NAME) +DT_FORMAT = "%Y-%m-%d" +MT_FORMAT = "%Y%m" + + def handler(event, context): """ Params: diff --git a/setup.py b/setup.py index 4c8d413..1d32a67 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,6 @@ "geojson-pydantic", "requests", "mercantile", - "python-dateutil", ] extra_reqs = { "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"],