Skip to content

Commit

Permalink
update to most recent COG validate script
Browse files Browse the repository at this point in the history
  • Loading branch information
dionhaefner committed Jan 17, 2022
1 parent f455edd commit da2bc5b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 17 deletions.
37 changes: 23 additions & 14 deletions terracotta/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def validate(src_path: str, strict: bool = True) -> bool:
def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
"""
Implementation from
https://github.com/cogeotiff/rio-cogeo/blob/0f00a6ee1eff602014fbc88178a069bd9f4a10da/rio_cogeo/cogeo.py
https://github.com/cogeotiff/rio-cogeo/blob/a07d914e2d898878417638bbc089179f01eb5b28/rio_cogeo/cogeo.py#L385
This function is the rasterio equivalent of
https://svn.osgeo.org/gdal/trunk/gdal/swig/python/samples/validate_cloud_optimized_geotiff.py
Expand All @@ -44,15 +44,13 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
errors.append('The file is not a GeoTIFF')
return errors, warnings, details

filelist = [os.path.basename(f) for f in src.files]
src_bname = os.path.basename(src_path)
if len(filelist) > 1 and src_bname + '.ovr' in filelist:
if any(os.path.splitext(x)[-1] == '.ovr' for x in src.files):
errors.append(
'Overviews found in external .ovr file. They should be internal'
)

overviews = src.overviews(1)
if src.width >= 512 or src.height >= 512:
if src.width > 512 and src.height > 512:
if not src.is_tiled:
errors.append(
'The file is greater than 512xH or 512xW, but is not tiled'
Expand All @@ -65,16 +63,28 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
)

ifd_offset = int(src.get_tag_item('IFD_OFFSET', 'TIFF', bidx=1))
ifd_offsets = [ifd_offset]
# Starting from GDAL 3.1, GeoTIFF and COG have ghost headers
# e.g:
# """
# GDAL_STRUCTURAL_METADATA_SIZE=000140 bytes
# LAYOUT=IFDS_BEFORE_DATA
# BLOCK_ORDER=ROW_MAJOR
# BLOCK_LEADER=SIZE_AS_UINT4
# BLOCK_TRAILER=LAST_4_BYTES_REPEATED
# KNOWN_INCOMPATIBLE_EDITION=NO
# """
#
# This header should be < 200bytes
if ifd_offset > 300:
errors.append(
f'The offset of the main IFD should be < 300. It is {ifd_offset} instead'
)

ifd_offsets = [ifd_offset]
details['ifd_offsets'] = {}
details['ifd_offsets']['main'] = ifd_offset

if not overviews == sorted(overviews):
if overviews and overviews != sorted(overviews):
errors.append('Overviews should be sorted')

for ix, dec in enumerate(overviews):
Expand Down Expand Up @@ -111,23 +121,22 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
)
)

block_offset = int(src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1))
if not block_offset:
errors.append('Missing BLOCK_OFFSET_0_0')
block_offset = src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1)

data_offset = int(block_offset) if block_offset else 0
data_offsets = [data_offset]
details['data_offsets'] = {}
details['data_offsets']['main'] = data_offset

for ix, dec in enumerate(overviews):
data_offset = int(
src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1, ovr=ix)
block_offset = src.get_tag_item(
'BLOCK_OFFSET_0_0', 'TIFF', bidx=1, ovr=ix
)
data_offset = int(block_offset) if block_offset else 0
data_offsets.append(data_offset)
details['data_offsets']['overview_{}'.format(ix)] = data_offset

if data_offsets[-1] < ifd_offsets[-1]:
if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]:
if len(overviews) > 0:
errors.append(
'The offset of the first block of the smallest overview '
Expand Down Expand Up @@ -156,7 +165,7 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover

for ix, dec in enumerate(overviews):
with rasterio.open(src_path, OVERVIEW_LEVEL=ix) as ovr_dst:
if ovr_dst.width >= 512 or ovr_dst.height >= 512:
if ovr_dst.width > 512 and ovr_dst.height > 512:
if not ovr_dst.is_tiled:
errors.append('Overview of index {} is not tiled'.format(ix))

Expand Down
6 changes: 3 additions & 3 deletions tests/test_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def test_validate_unoptimized(tmpdir):
from terracotta import cog

outfile = str(tmpdir / 'raster.tif')
raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16)
raster_data = 1000 * np.random.rand(1024, 1024).astype(np.uint16)

profile = BASE_PROFILE.copy()
profile.update(
height=raster_data.shape[0],
width=raster_data.shape[1]
width=raster_data.shape[1],
)

with rasterio.open(outfile, 'w', **profile) as dst:
Expand All @@ -87,7 +87,7 @@ def test_validate_no_overviews(tmpdir):
from terracotta import cog

outfile = str(tmpdir / 'raster.tif')
raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16)
raster_data = 1000 * np.random.rand(1024, 1024).astype(np.uint16)

profile = BASE_PROFILE.copy()
profile.update(
Expand Down

0 comments on commit da2bc5b

Please sign in to comment.