From a9ba359f15f5cf73f51104fc780d2f396f88148c Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Tue, 26 Jan 2021 16:17:09 -0500 Subject: [PATCH 1/9] implemented timelapse for arbitrary dataset + no datavalue filtering when calculating mean/median" --- covid_api/api/api_v1/endpoints/exceptions.py | 27 ++++++ covid_api/api/api_v1/endpoints/timelapse.py | 96 +++++++++++++++++++- covid_api/api/utils.py | 13 ++- covid_api/db/static/datasets/no2-diff.json | 2 +- covid_api/models/timelapse.py | 18 +++- 5 files changed, 146 insertions(+), 10 deletions(-) create mode 100644 covid_api/api/api_v1/endpoints/exceptions.py diff --git a/covid_api/api/api_v1/endpoints/exceptions.py b/covid_api/api/api_v1/endpoints/exceptions.py new file mode 100644 index 0000000..d5a1c5f --- /dev/null +++ b/covid_api/api/api_v1/endpoints/exceptions.py @@ -0,0 +1,27 @@ +"""Exceptions for the endpoints classes""" + + +class NonRasterDataset(Exception): + """Thrown if timelapse requested for a non-raster dataset""" + + pass + + +class UnableToExtractS3Url(Exception): + """Thrown if code is not ale to extract the S3 URL of the dataset """ + + pass + + +class InvalidDateFormat(Exception): + """Thrown if the timelapse request query contains a date that is not correctly + formatted for the given dataset """ + + pass + + +class MissingSpotlightId(Exception): + """Thrown if the timelapse request query is for a spotlight specific dataset, + but no spotlightId was supplied in the query """ + + pass diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index d050b0c..37fc314 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,10 +1,25 @@ """API metadata.""" +import re +from datetime import datetime +from covid_api.api.api_v1.endpoints.exceptions import ( + InvalidDateFormat, + MissingSpotlightId, + NonRasterDataset, + UnableToExtractS3Url, +) from covid_api.api.utils import get_zonal_stat +from covid_api.core.config import API_VERSION_STR, DT_FORMAT, MT_FORMAT +from covid_api.db.static.datasets import datasets as _datasets +from covid_api.db.static.errors import InvalidIdentifier +from covid_api.db.static.sites import sites +from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue from fastapi import APIRouter +from starlette.requests import Request + router = APIRouter() @@ -13,11 +28,82 @@ responses={200: {"description": "Return timelapse values for a given geometry"}}, response_model=TimelapseValue, ) -def timelapse(query: TimelapseRequest): +def timelapse(request: Request, query: TimelapseRequest): """Handle /timelapse requests.""" - if query.type == "no2": - url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif" - else: - url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif" + + # get dataset metadata for the requested dataset + # will be used to validate other parts of the query + dataset = _get_dataset_metadata(request, query) + + # extract S3 URL template from dataset metadata info + url = _extract_s3_url(dataset) + + # format S3 URL template with date object + url = _insert_date(url, dataset, query.date) + + # format S3 URL template with spotlightId, if dataset is + # spotlight specific + if "{spotlightId}" in url: + url = _insert_spotlight_id(url, query.spotlight_id) + mean, median = get_zonal_stat(query.geojson, url) return dict(mean=mean, median=median) + + +def _get_dataset_metadata(request: Request, query: TimelapseRequest): + + scheme = request.url.scheme + host = request.headers["host"] + + if API_VERSION_STR: + host += API_VERSION_STR + + dataset = list( + filter( + lambda d: d.id == query.dataset_id, + _datasets.get_all(api_url=f"{scheme}://{host}").datasets, + ) + ) + + if not dataset: + raise InvalidIdentifier + + dataset = dataset[0] + + if dataset.source.type != "raster": + raise NonRasterDataset + + return dataset + + +def _extract_s3_url(dataset: Dataset): + url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) + if not url_search: + raise UnableToExtractS3Url + + return url_search.group(1) + + +def _insert_date(url: str, dataset: Dataset, date: str): + _validate_query_date(dataset, date) + + url = url.replace("{date}", date) + + +def _validate_query_date(dataset: Dataset, date: str): + date_format = DT_FORMAT if dataset.time_unit == "day" else MT_FORMAT + try: + datetime.strptime(date, date_format) + except ValueError: + raise InvalidDateFormat + + +def _insert_spotlight_id(url: str, spotlight_id: str): + if not spotlight_id: + raise MissingSpotlightId + try: + sites.get(spotlight_id) + except InvalidIdentifier: + raise + + url = url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 177164d..8e7986c 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -213,17 +213,26 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: """Return zonal statistics.""" geom = shape(geojson.geometry.dict()) with rasterio.open(raster) as src: + # read the raster data matching the geometry bounds window = bounds_window(geom.bounds, src.transform) # store our window information & read window_affine = src.window_transform(window) data = src.read(window=window) - # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) + print("Data: ", data) + print("PctCover: ", pctcover) + print("Nodata val: ", src.nodata) + + # Create a mask of the data that filters out the tile's `nodata` value. In order + # to ensure the average calculation isn't incorrectly affected by large, negative, + # `nodata` values. + masked_data = np.ma.masked_not_equal(data[0], src.nodata) + return ( - np.average(data[0], weights=pctcover), + np.average(masked_data, weights=pctcover), np.nanmedian(data), ) diff --git a/covid_api/db/static/datasets/no2-diff.json b/covid_api/db/static/datasets/no2-diff.json index 62eb16d..32882bb 100644 --- a/covid_api/db/static/datasets/no2-diff.json +++ b/covid_api/db/static/datasets/no2-diff.json @@ -3,7 +3,7 @@ "name": "NO\u2082 (Diff)", "type": "raster-timeseries", "time_unit": "month", - "is_periodic": false, + "is_periodic": true, "s3_location": "OMNO2d_HRMDifference", "source": { "type": "raster", diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 8275a1d..32093fb 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,10 +1,17 @@ """Tilelapse models.""" +import re +from typing import Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon from pydantic import BaseModel +def to_camel(s): + """ Convert string s from `snake_case` to `camelCase` """ + return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) + + class PolygonFeature(Feature): """Feature model.""" @@ -21,6 +28,13 @@ class TimelapseValue(BaseModel): class TimelapseRequest(BaseModel): """"Timelapse request model.""" - month: str + date: str geojson: PolygonFeature - type: str + dataset_id: str + spotlight_id: Optional[str] + + class Config: + """Generate alias to convert `camelCase` requests to `snake_case` fields to be used + within the code """ + + alias_generator = to_camel From f3e5dc6f7b1822fda1f998a1303ddeb9bc8071cd Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Tue, 26 Jan 2021 16:44:10 -0500 Subject: [PATCH 2/9] minor fixes to masking function and helper functions in timelapse --- covid_api/api/api_v1/endpoints/timelapse.py | 5 ++--- covid_api/api/utils.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index 37fc314..bfd8c56 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -86,8 +86,7 @@ def _extract_s3_url(dataset: Dataset): def _insert_date(url: str, dataset: Dataset, date: str): _validate_query_date(dataset, date) - - url = url.replace("{date}", date) + return url.replace("{date}", date) def _validate_query_date(dataset: Dataset, date: str): @@ -106,4 +105,4 @@ def _insert_spotlight_id(url: str, spotlight_id: str): except InvalidIdentifier: raise - url = url.replace("{spotlightId}", spotlight_id) + return url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 8e7986c..356cdfe 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -229,7 +229,7 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # Create a mask of the data that filters out the tile's `nodata` value. In order # to ensure the average calculation isn't incorrectly affected by large, negative, # `nodata` values. - masked_data = np.ma.masked_not_equal(data[0], src.nodata) + masked_data = np.ma.masked_equal(data[0], src.nodata) return ( np.average(masked_data, weights=pctcover), From ef9557afd946944214562f63d4155ce7b80d9d10 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Wed, 27 Jan 2021 10:34:29 -0500 Subject: [PATCH 3/9] modified reserved concurrency to be 50 in non-prod stacks --- stack/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stack/config.py b/stack/config.py index 81c73f1..31934c9 100644 --- a/stack/config.py +++ b/stack/config.py @@ -41,7 +41,7 @@ ################################################################################ TIMEOUT: int = 10 MEMORY: int = 1536 -MAX_CONCURRENT: int = 500 +MAX_CONCURRENT: int = 500 if STAGE == "prod" else 50 # Cache CACHE_NODE_TYPE = "cache.m5.large" From 131dc38cd5a1974c4c33e05ca36c82553dfbf610 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Wed, 27 Jan 2021 16:48:02 -0500 Subject: [PATCH 4/9] WIP --- lambda/dataset_metadata_generator/src/main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lambda/dataset_metadata_generator/src/main.py b/lambda/dataset_metadata_generator/src/main.py index 3cb95ff..b76fdfa 100644 --- a/lambda/dataset_metadata_generator/src/main.py +++ b/lambda/dataset_metadata_generator/src/main.py @@ -19,9 +19,6 @@ s3 = boto3.resource("s3") bucket = s3.Bucket(BUCKET_NAME) -DT_FORMAT = "%Y-%m-%d" -MT_FORMAT = "%Y%m" - def handler(event, context): """ From 0e826d3197db196ba3bff303c3374f5b0c5725ed Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Wed, 27 Jan 2021 21:00:25 -0500 Subject: [PATCH 5/9] fixed invalide `day` format in timelapse endpoint --- covid_api/api/api_v1/endpoints/timelapse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index bfd8c56..f9ce789 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -9,7 +9,7 @@ UnableToExtractS3Url, ) from covid_api.api.utils import get_zonal_stat -from covid_api.core.config import API_VERSION_STR, DT_FORMAT, MT_FORMAT +from covid_api.core.config import API_VERSION_STR from covid_api.db.static.datasets import datasets as _datasets from covid_api.db.static.errors import InvalidIdentifier from covid_api.db.static.sites import sites @@ -90,7 +90,7 @@ def _insert_date(url: str, dataset: Dataset, date: str): def _validate_query_date(dataset: Dataset, date: str): - date_format = DT_FORMAT if dataset.time_unit == "day" else MT_FORMAT + date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" try: datetime.strptime(date, date_format) except ValueError: From 2630cc76aac3b009e147a129db86aec6d11169ef Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 5 Feb 2021 15:30:30 -0500 Subject: [PATCH 6/9] added error handling --- covid_api/api/api_v1/endpoints/detections.py | 2 ++ covid_api/api/api_v1/endpoints/exceptions.py | 27 -------------- covid_api/api/api_v1/endpoints/timelapse.py | 38 ++++++++++++-------- covid_api/api/utils.py | 5 +-- 4 files changed, 27 insertions(+), 45 deletions(-) delete mode 100644 covid_api/api/api_v1/endpoints/exceptions.py diff --git a/covid_api/api/api_v1/endpoints/detections.py b/covid_api/api/api_v1/endpoints/detections.py index 669b29e..c6409cf 100644 --- a/covid_api/api/api_v1/endpoints/detections.py +++ b/covid_api/api/api_v1/endpoints/detections.py @@ -32,5 +32,7 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str): key=f"detections-{ml_type.value}/{site.value}/{date}.geojson", ) ) + # TODO: catch the specific exception that corresponds to a missing file + # and raise 404, otherwise raise a generic 500 error. except Exception: raise HTTPException(status_code=404, detail="Detections not found") diff --git a/covid_api/api/api_v1/endpoints/exceptions.py b/covid_api/api/api_v1/endpoints/exceptions.py deleted file mode 100644 index d5a1c5f..0000000 --- a/covid_api/api/api_v1/endpoints/exceptions.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Exceptions for the endpoints classes""" - - -class NonRasterDataset(Exception): - """Thrown if timelapse requested for a non-raster dataset""" - - pass - - -class UnableToExtractS3Url(Exception): - """Thrown if code is not ale to extract the S3 URL of the dataset """ - - pass - - -class InvalidDateFormat(Exception): - """Thrown if the timelapse request query contains a date that is not correctly - formatted for the given dataset """ - - pass - - -class MissingSpotlightId(Exception): - """Thrown if the timelapse request query is for a spotlight specific dataset, - but no spotlightId was supplied in the query """ - - pass diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index f9ce789..fd72883 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -2,12 +2,6 @@ import re from datetime import datetime -from covid_api.api.api_v1.endpoints.exceptions import ( - InvalidDateFormat, - MissingSpotlightId, - NonRasterDataset, - UnableToExtractS3Url, -) from covid_api.api.utils import get_zonal_stat from covid_api.core.config import API_VERSION_STR from covid_api.db.static.datasets import datasets as _datasets @@ -16,7 +10,7 @@ from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException from starlette.requests import Request @@ -45,8 +39,14 @@ def timelapse(request: Request, query: TimelapseRequest): # spotlight specific if "{spotlightId}" in url: url = _insert_spotlight_id(url, query.spotlight_id) + try: + mean, median = get_zonal_stat(query.geojson, url) + except ValueError: + raise HTTPException( + status_code=400, + detail="Unable to calculate mean/median values. This is likely due to a bounding box extending beyond the borders of the tile.", + ) - mean, median = get_zonal_stat(query.geojson, url) return dict(mean=mean, median=median) @@ -66,12 +66,17 @@ def _get_dataset_metadata(request: Request, query: TimelapseRequest): ) if not dataset: - raise InvalidIdentifier + raise HTTPException( + status_code=404, detail=f"No dataset found for id: {query.dataset_id}" + ) dataset = dataset[0] if dataset.source.type != "raster": - raise NonRasterDataset + raise HTTPException( + status_code=400, + detail=f"Dataset {query.dataset_id} is not a raster-type dataset", + ) return dataset @@ -79,7 +84,7 @@ def _get_dataset_metadata(request: Request, query: TimelapseRequest): def _extract_s3_url(dataset: Dataset): url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) if not url_search: - raise UnableToExtractS3Url + raise HTTPException(status_code=500) return url_search.group(1) @@ -94,15 +99,20 @@ def _validate_query_date(dataset: Dataset, date: str): try: datetime.strptime(date, date_format) except ValueError: - raise InvalidDateFormat + raise HTTPException( + status_code=400, + detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM", + ) def _insert_spotlight_id(url: str, spotlight_id: str): if not spotlight_id: - raise MissingSpotlightId + raise HTTPException(status_code=400, detail="Missing spotlightId") try: sites.get(spotlight_id) except InvalidIdentifier: - raise + raise HTTPException( + status_code=404, detail=f"No spotlight found for id: {spotlight_id}" + ) return url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 356cdfe..645fa2e 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -219,13 +219,10 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # store our window information & read window_affine = src.window_transform(window) data = src.read(window=window) + # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) - print("Data: ", data) - print("PctCover: ", pctcover) - print("Nodata val: ", src.nodata) - # Create a mask of the data that filters out the tile's `nodata` value. In order # to ensure the average calculation isn't incorrectly affected by large, negative, # `nodata` values. From 6f7617133059c5bf0aec12e4787db7e9acb8224f Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 21 May 2021 13:55:11 -0700 Subject: [PATCH 7/9] WIP - working on date range timelapse endpoint --- covid_api/api/api_v1/endpoints/timelapse.py | 99 +++++++++++++++++---- covid_api/api/utils.py | 1 - covid_api/models/timelapse.py | 26 ++++-- setup.py | 1 + 4 files changed, 100 insertions(+), 27 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index fd72883..e20e3e0 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,6 +1,10 @@ """API metadata.""" import re -from datetime import datetime +from concurrent import futures +from datetime import datetime, timedelta +from typing import List, Union + +from dateutil.relativedelta import relativedelta from covid_api.api.utils import get_zonal_stat from covid_api.core.config import API_VERSION_STR @@ -17,30 +21,22 @@ router = APIRouter() -@router.post( - "/timelapse", - responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=TimelapseValue, -) -def timelapse(request: Request, query: TimelapseRequest): - """Handle /timelapse requests.""" - - # get dataset metadata for the requested dataset - # will be used to validate other parts of the query - dataset = _get_dataset_metadata(request, query) - - # extract S3 URL template from dataset metadata info - url = _extract_s3_url(dataset) - - # format S3 URL template with date object - url = _insert_date(url, dataset, query.date) +# TODO: validate inputs with typing/pydantic models +def _get_mean_median(query, url, dataset): # format S3 URL template with spotlightId, if dataset is # spotlight specific if "{spotlightId}" in url: + if not query.spotlight_id: + raise HTTPException( + status_code=400, + detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", + ) url = _insert_spotlight_id(url, query.spotlight_id) try: + print("REQUESTING ZONAL STATS for URL", url) mean, median = get_zonal_stat(query.geojson, url) + print("DONE! ", mean, median) except ValueError: raise HTTPException( status_code=400, @@ -50,6 +46,71 @@ def timelapse(request: Request, query: TimelapseRequest): return dict(mean=mean, median=median) +@router.post( + "/timelapse", + responses={200: {"description": "Return timelapse values for a given geometry"}}, + response_model=Union[TimelapseValue, List[TimelapseValue]], +) +def timelapse(request: Request, query: TimelapseRequest): + """Handle /timelapse requests.""" + + # get dataset metadata for the requested dataset + # will be used to validate other parts of the query + dataset = _get_dataset_metadata(request, query) + + # extract S3 URL template from dataset metadata info + url = _extract_s3_url(dataset) + + if query.date: + print("SINGE DATE IN QUERY - calculating") + + # format S3 URL template with date object + url = _insert_date(url, dataset, query.date) + print("URL: ", url) + return _get_mean_median(query, url, dataset) + + if query.date_range: + + start = _validate_query_date(dataset, query.date_range[0]) + + end = _validate_query_date(dataset, query.date_range[1]) + + if dataset.time_unit == "day": + # Add 1 to days to ensure it contains the end date as well + dates = [ + datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") + for x in range(0, (end - start).days + 1) + ] + + if dataset.time_unit == "month": + num_months = (end.year - start.year) * 12 + (end.month - start.month) + dates = [ + datetime.strftime((start + relativedelta(months=+x)), "%Y%m") + for x in range(0, num_months + 1) + ] + print("DATES TO QUERY: ", dates) + + stats = [] + with futures.ThreadPoolExecutor(max_workers=15) as executor: + future_stats_queries = { + executor.submit( + _get_mean_median, query, _insert_date(url, dataset, date), dataset + ): date + for date in dates + } + print("FUTURE stats queries: ", future_stats_queries) + for future in futures.as_completed(future_stats_queries): + date = future_stats_queries[future] + print("FROM FUTURE: ", date) + try: + print("RESULT: ", future.result()) + stats.append({"date": date, **future.result()}) + except HTTPException as e: + stats.append({"date": date, "error": e.detail}) + print("STATS TO BE RETURNED: ", stats) + return stats + + def _get_dataset_metadata(request: Request, query: TimelapseRequest): scheme = request.url.scheme @@ -97,7 +158,7 @@ def _insert_date(url: str, dataset: Dataset, date: str): def _validate_query_date(dataset: Dataset, date: str): date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" try: - datetime.strptime(date, date_format) + return datetime.strptime(date, date_format) except ValueError: raise HTTPException( status_code=400, diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 1c48d6e..8c2d406 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -214,7 +214,6 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: """Return zonal statistics.""" geom = shape(geojson.geometry.dict()) with rasterio.open(raster) as src: - # read the raster data matching the geometry bounds window = bounds_window(geom.bounds, src.transform) # store our window information & read diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 32093fb..22f5518 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,14 +1,14 @@ """Tilelapse models.""" import re -from typing import Optional +from typing import List, Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon -from pydantic import BaseModel +from pydantic import BaseModel, validator def to_camel(s): - """ Convert string s from `snake_case` to `camelCase` """ + """Convert string s from `snake_case` to `camelCase`""" return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) @@ -19,22 +19,34 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): - """"Timelapse values model.""" + """ "Timelapse values model.""" mean: float median: float + date: Optional[str] + error: Optional[str] class TimelapseRequest(BaseModel): - """"Timelapse request model.""" + """ "Timelapse request model.""" - date: str + # TODO: parse this into a python `datetime` object (maybe using a validator? ) + # TODO: validate that exactly one of `date` or `date_range` is supplied + date: Optional[str] + date_range: Optional[List[str]] geojson: PolygonFeature dataset_id: str spotlight_id: Optional[str] + @validator("date_range") + def validate_date_objects(cls, v): + """Validator""" + if not len(v) == 2: + raise ValueError("Field `dateRange` must contain exactly 2 dates") + return v + class Config: """Generate alias to convert `camelCase` requests to `snake_case` fields to be used - within the code """ + within the code""" alias_generator = to_camel diff --git a/setup.py b/setup.py index 1d32a67..4c8d413 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ "geojson-pydantic", "requests", "mercantile", + "python-dateutil", ] extra_reqs = { "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"], From 700198424ed599bfb58b4b84ebaa44ffbb93ab40 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 21 May 2021 14:05:21 -0700 Subject: [PATCH 8/9] works with No2 - very slow with nightlights --- covid_api/api/api_v1/endpoints/timelapse.py | 5 +---- covid_api/models/timelapse.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index e20e3e0..b9fd4b6 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -62,17 +62,14 @@ def timelapse(request: Request, query: TimelapseRequest): url = _extract_s3_url(dataset) if query.date: - print("SINGE DATE IN QUERY - calculating") # format S3 URL template with date object url = _insert_date(url, dataset, query.date) - print("URL: ", url) return _get_mean_median(query, url, dataset) if query.date_range: start = _validate_query_date(dataset, query.date_range[0]) - end = _validate_query_date(dataset, query.date_range[1]) if dataset.time_unit == "day": @@ -91,7 +88,7 @@ def timelapse(request: Request, query: TimelapseRequest): print("DATES TO QUERY: ", dates) stats = [] - with futures.ThreadPoolExecutor(max_workers=15) as executor: + with futures.ThreadPoolExecutor(max_workers=10) as executor: future_stats_queries = { executor.submit( _get_mean_median, query, _insert_date(url, dataset, date), dataset diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 22f5518..9237cda 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -40,6 +40,7 @@ class TimelapseRequest(BaseModel): @validator("date_range") def validate_date_objects(cls, v): + """Validator""" if not len(v) == 2: raise ValueError("Field `dateRange` must contain exactly 2 dates") From bccd41f70f43b03c63916f04d512514429585c19 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Mon, 24 May 2021 17:40:14 -0700 Subject: [PATCH 9/9] WIP - added threaded timelapse for date-range enabled request --- covid_api/api/api_v1/endpoints/timelapse.py | 44 ++++++++++++--------- covid_api/models/timelapse.py | 4 +- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index b9fd4b6..b2457a4 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -34,22 +34,25 @@ def _get_mean_median(query, url, dataset): ) url = _insert_spotlight_id(url, query.spotlight_id) try: - print("REQUESTING ZONAL STATS for URL", url) mean, median = get_zonal_stat(query.geojson, url) - print("DONE! ", mean, median) - except ValueError: + return dict(mean=mean, median=median) + + except Exception: raise HTTPException( status_code=400, - detail="Unable to calculate mean/median values. This is likely due to a bounding box extending beyond the borders of the tile.", + detail=( + "Unable to calculate mean/median values. This either due to a bounding box " + "extending beyond the edges of the COG or there are no COGs available for the " + "requested date range." + ), ) - return dict(mean=mean, median=median) - @router.post( "/timelapse", responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=Union[TimelapseValue, List[TimelapseValue]], + response_model=Union[List[TimelapseValue], TimelapseValue], + response_model_exclude_none=True, ) def timelapse(request: Request, query: TimelapseRequest): """Handle /timelapse requests.""" @@ -67,45 +70,48 @@ def timelapse(request: Request, query: TimelapseRequest): url = _insert_date(url, dataset, query.date) return _get_mean_median(query, url, dataset) + # Gather a list of dates to query if query.date_range: - start = _validate_query_date(dataset, query.date_range[0]) - end = _validate_query_date(dataset, query.date_range[1]) - if dataset.time_unit == "day": - # Add 1 to days to ensure it contains the end date as well + # Get start and end dates + start = datetime.strptime(query.date_range[0], "%Y_%m_%d") + end = datetime.strptime(query.date_range[1], "%Y_%m_%d") + + # Populated all days in between Add 1 to days to ensure it contains the end date as well dates = [ datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") for x in range(0, (end - start).days + 1) ] if dataset.time_unit == "month": + # Get start and end dates, as a + start = datetime.strptime(query.date_range[0], "%Y%m") + end = datetime.strptime(query.date_range[1], "%Y%m") num_months = (end.year - start.year) * 12 + (end.month - start.month) dates = [ datetime.strftime((start + relativedelta(months=+x)), "%Y%m") for x in range(0, num_months + 1) ] - print("DATES TO QUERY: ", dates) - stats = [] - with futures.ThreadPoolExecutor(max_workers=10) as executor: + with futures.ThreadPoolExecutor(max_workers=15) as executor: future_stats_queries = { executor.submit( _get_mean_median, query, _insert_date(url, dataset, date), dataset ): date for date in dates } - print("FUTURE stats queries: ", future_stats_queries) + + stats = [] + for future in futures.as_completed(future_stats_queries): date = future_stats_queries[future] - print("FROM FUTURE: ", date) try: - print("RESULT: ", future.result()) stats.append({"date": date, **future.result()}) except HTTPException as e: + stats.append({"date": date, "error": e.detail}) - print("STATS TO BE RETURNED: ", stats) - return stats + return stats def _get_dataset_metadata(request: Request, query: TimelapseRequest): diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 9237cda..1e7dfa6 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -21,8 +21,8 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): """ "Timelapse values model.""" - mean: float - median: float + mean: Optional[float] + median: Optional[float] date: Optional[str] error: Optional[str]