Skip to content

Commit

Permalink
Merge pull request #129 from NASA-IMPACT/revert-113-feature/abstract-…
Browse files Browse the repository at this point in the history
…timelapse-endpoint

Revert "Feature: abstract timelapse endpoint"
  • Loading branch information
leothomas authored Jun 10, 2021
2 parents 19f7d88 + 0458aaa commit ad136d7
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 212 deletions.
2 changes: 0 additions & 2 deletions covid_api/api/api_v1/endpoints/detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,5 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str):
key=f"detections-{ml_type.value}/{site.value}/{date}.geojson",
)
)
# TODO: catch the specific exception that corresponds to a missing file
# and raise 404, otherwise raise a generic 500 error.
except Exception:
raise HTTPException(status_code=404, detail="Detections not found")
177 changes: 9 additions & 168 deletions covid_api/api/api_v1/endpoints/timelapse.py
Original file line number Diff line number Diff line change
@@ -1,182 +1,23 @@
"""API metadata."""
import re
from concurrent import futures
from datetime import datetime, timedelta
from typing import List, Union

from dateutil.relativedelta import relativedelta

from covid_api.api.utils import get_zonal_stat
from covid_api.core.config import API_VERSION_STR
from covid_api.db.static.datasets import datasets as _datasets
from covid_api.db.static.errors import InvalidIdentifier
from covid_api.db.static.sites import sites
from covid_api.models.static import Dataset
from covid_api.models.timelapse import TimelapseRequest, TimelapseValue

from fastapi import APIRouter, HTTPException

from starlette.requests import Request
from fastapi import APIRouter

router = APIRouter()


# TODO: validate inputs with typing/pydantic models
def _get_mean_median(query, url, dataset):

# format S3 URL template with spotlightId, if dataset is
# spotlight specific
if "{spotlightId}" in url:
if not query.spotlight_id:
raise HTTPException(
status_code=400,
detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}",
)
url = _insert_spotlight_id(url, query.spotlight_id)
try:
mean, median = get_zonal_stat(query.geojson, url)
return dict(mean=mean, median=median)

except Exception:
raise HTTPException(
status_code=400,
detail=(
"Unable to calculate mean/median values. This either due to a bounding box "
"extending beyond the edges of the COG or there are no COGs available for the "
"requested date range."
),
)


@router.post(
"/timelapse",
responses={200: {"description": "Return timelapse values for a given geometry"}},
response_model=Union[List[TimelapseValue], TimelapseValue],
response_model_exclude_none=True,
response_model=TimelapseValue,
)
def timelapse(request: Request, query: TimelapseRequest):
def timelapse(query: TimelapseRequest):
"""Handle /timelapse requests."""

# get dataset metadata for the requested dataset
# will be used to validate other parts of the query
dataset = _get_dataset_metadata(request, query)

# extract S3 URL template from dataset metadata info
url = _extract_s3_url(dataset)

if query.date:

# format S3 URL template with date object
url = _insert_date(url, dataset, query.date)
return _get_mean_median(query, url, dataset)

# Gather a list of dates to query
if query.date_range:

if dataset.time_unit == "day":
# Get start and end dates
start = datetime.strptime(query.date_range[0], "%Y_%m_%d")
end = datetime.strptime(query.date_range[1], "%Y_%m_%d")

# Populated all days in between Add 1 to days to ensure it contains the end date as well
dates = [
datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d")
for x in range(0, (end - start).days + 1)
]

if dataset.time_unit == "month":
# Get start and end dates, as a
start = datetime.strptime(query.date_range[0], "%Y%m")
end = datetime.strptime(query.date_range[1], "%Y%m")
num_months = (end.year - start.year) * 12 + (end.month - start.month)
dates = [
datetime.strftime((start + relativedelta(months=+x)), "%Y%m")
for x in range(0, num_months + 1)
]

with futures.ThreadPoolExecutor(max_workers=15) as executor:
future_stats_queries = {
executor.submit(
_get_mean_median, query, _insert_date(url, dataset, date), dataset
): date
for date in dates
}

stats = []

for future in futures.as_completed(future_stats_queries):
date = future_stats_queries[future]
try:
stats.append({"date": date, **future.result()})
except HTTPException as e:

stats.append({"date": date, "error": e.detail})
return stats


def _get_dataset_metadata(request: Request, query: TimelapseRequest):

scheme = request.url.scheme
host = request.headers["host"]

if API_VERSION_STR:
host += API_VERSION_STR

dataset = list(
filter(
lambda d: d.id == query.dataset_id,
_datasets.get_all(api_url=f"{scheme}://{host}").datasets,
)
)

if not dataset:
raise HTTPException(
status_code=404, detail=f"No dataset found for id: {query.dataset_id}"
)

dataset = dataset[0]

if dataset.source.type != "raster":
raise HTTPException(
status_code=400,
detail=f"Dataset {query.dataset_id} is not a raster-type dataset",
)

return dataset


def _extract_s3_url(dataset: Dataset):
url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0])
if not url_search:
raise HTTPException(status_code=500)

return url_search.group(1)


def _insert_date(url: str, dataset: Dataset, date: str):
_validate_query_date(dataset, date)
return url.replace("{date}", date)


def _validate_query_date(dataset: Dataset, date: str):
date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m"
try:
return datetime.strptime(date, date_format)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM",
)


def _insert_spotlight_id(url: str, spotlight_id: str):
if not spotlight_id:
raise HTTPException(status_code=400, detail="Missing spotlightId")
try:
sites.get(spotlight_id)
except InvalidIdentifier:
raise HTTPException(
status_code=404, detail=f"No spotlight found for id: {spotlight_id}"
)

return url.replace("{spotlightId}", spotlight_id)
if query.type == "no2":
url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif"
else:
url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif"
mean, median = get_zonal_stat(query.geojson, url)
return dict(mean=mean, median=median)
7 changes: 1 addition & 6 deletions covid_api/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,8 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]:
# calculate the coverage of pixels for weighting
pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:])

# Create a mask of the data that filters out the tile's `nodata` value. In order
# to ensure the average calculation isn't incorrectly affected by large, negative,
# `nodata` values.
masked_data = np.ma.masked_equal(data[0], src.nodata)

return (
np.average(masked_data, weights=pctcover),
np.average(data[0], weights=pctcover),
np.nanmedian(data),
)

Expand Down
2 changes: 1 addition & 1 deletion covid_api/db/static/datasets/no2-diff.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"name": "NO\u2082 (Diff)",
"type": "raster-timeseries",
"time_unit": "month",
"is_periodic": true,
"is_periodic": false,
"s3_location": "OMNO2d_HRMDifference",
"source": {
"type": "raster",
Expand Down
41 changes: 7 additions & 34 deletions covid_api/models/timelapse.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
"""Tilelapse models."""
import re
from typing import List, Optional

from geojson_pydantic.features import Feature
from geojson_pydantic.geometries import Polygon
from pydantic import BaseModel, validator


def to_camel(s):
"""Convert string s from `snake_case` to `camelCase`"""
return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s)
from pydantic import BaseModel


class PolygonFeature(Feature):
Expand All @@ -19,35 +12,15 @@ class PolygonFeature(Feature):


class TimelapseValue(BaseModel):
""" "Timelapse values model."""
""""Timelapse values model."""

mean: Optional[float]
median: Optional[float]
date: Optional[str]
error: Optional[str]
mean: float
median: float


class TimelapseRequest(BaseModel):
""" "Timelapse request model."""
""""Timelapse request model."""

# TODO: parse this into a python `datetime` object (maybe using a validator? )
# TODO: validate that exactly one of `date` or `date_range` is supplied
date: Optional[str]
date_range: Optional[List[str]]
month: str
geojson: PolygonFeature
dataset_id: str
spotlight_id: Optional[str]

@validator("date_range")
def validate_date_objects(cls, v):

"""Validator"""
if not len(v) == 2:
raise ValueError("Field `dateRange` must contain exactly 2 dates")
return v

class Config:
"""Generate alias to convert `camelCase` requests to `snake_case` fields to be used
within the code"""

alias_generator = to_camel
type: str
4 changes: 4 additions & 0 deletions lambda/dataset_metadata_generator/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
).Bucket(BUCKET_NAME)


DT_FORMAT = "%Y-%m-%d"
MT_FORMAT = "%Y%m"


def handler(event, context):
"""
Params:
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"geojson-pydantic",
"requests",
"mercantile",
"python-dateutil",
]
extra_reqs = {
"dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"],
Expand Down

0 comments on commit ad136d7

Please sign in to comment.