From da2bc5bd63ca84fa36f64a7de1af357be0dc670f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Mon, 17 Jan 2022 21:11:44 +0100 Subject: [PATCH] update to most recent COG validate script --- terracotta/cog.py | 37 +++++++++++++++++++++++-------------- tests/test_cog.py | 6 +++--- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/terracotta/cog.py b/terracotta/cog.py index 14470604..08d612e4 100644 --- a/terracotta/cog.py +++ b/terracotta/cog.py @@ -25,7 +25,7 @@ def validate(src_path: str, strict: bool = True) -> bool: def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover """ Implementation from - https://github.com/cogeotiff/rio-cogeo/blob/0f00a6ee1eff602014fbc88178a069bd9f4a10da/rio_cogeo/cogeo.py + https://github.com/cogeotiff/rio-cogeo/blob/a07d914e2d898878417638bbc089179f01eb5b28/rio_cogeo/cogeo.py#L385 This function is the rasterio equivalent of https://svn.osgeo.org/gdal/trunk/gdal/swig/python/samples/validate_cloud_optimized_geotiff.py @@ -44,15 +44,13 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover errors.append('The file is not a GeoTIFF') return errors, warnings, details - filelist = [os.path.basename(f) for f in src.files] - src_bname = os.path.basename(src_path) - if len(filelist) > 1 and src_bname + '.ovr' in filelist: + if any(os.path.splitext(x)[-1] == '.ovr' for x in src.files): errors.append( 'Overviews found in external .ovr file. They should be internal' ) overviews = src.overviews(1) - if src.width >= 512 or src.height >= 512: + if src.width > 512 and src.height > 512: if not src.is_tiled: errors.append( 'The file is greater than 512xH or 512xW, but is not tiled' @@ -65,16 +63,28 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover ) ifd_offset = int(src.get_tag_item('IFD_OFFSET', 'TIFF', bidx=1)) - ifd_offsets = [ifd_offset] + # Starting from GDAL 3.1, GeoTIFF and COG have ghost headers + # e.g: + # """ + # GDAL_STRUCTURAL_METADATA_SIZE=000140 bytes + # LAYOUT=IFDS_BEFORE_DATA + # BLOCK_ORDER=ROW_MAJOR + # BLOCK_LEADER=SIZE_AS_UINT4 + # BLOCK_TRAILER=LAST_4_BYTES_REPEATED + # KNOWN_INCOMPATIBLE_EDITION=NO + # """ + # + # This header should be < 200bytes if ifd_offset > 300: errors.append( f'The offset of the main IFD should be < 300. It is {ifd_offset} instead' ) + ifd_offsets = [ifd_offset] details['ifd_offsets'] = {} details['ifd_offsets']['main'] = ifd_offset - if not overviews == sorted(overviews): + if overviews and overviews != sorted(overviews): errors.append('Overviews should be sorted') for ix, dec in enumerate(overviews): @@ -111,9 +121,7 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover ) ) - block_offset = int(src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1)) - if not block_offset: - errors.append('Missing BLOCK_OFFSET_0_0') + block_offset = src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1) data_offset = int(block_offset) if block_offset else 0 data_offsets = [data_offset] @@ -121,13 +129,14 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover details['data_offsets']['main'] = data_offset for ix, dec in enumerate(overviews): - data_offset = int( - src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1, ovr=ix) + block_offset = src.get_tag_item( + 'BLOCK_OFFSET_0_0', 'TIFF', bidx=1, ovr=ix ) + data_offset = int(block_offset) if block_offset else 0 data_offsets.append(data_offset) details['data_offsets']['overview_{}'.format(ix)] = data_offset - if data_offsets[-1] < ifd_offsets[-1]: + if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]: if len(overviews) > 0: errors.append( 'The offset of the first block of the smallest overview ' @@ -156,7 +165,7 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover for ix, dec in enumerate(overviews): with rasterio.open(src_path, OVERVIEW_LEVEL=ix) as ovr_dst: - if ovr_dst.width >= 512 or ovr_dst.height >= 512: + if ovr_dst.width > 512 and ovr_dst.height > 512: if not ovr_dst.is_tiled: errors.append('Overview of index {} is not tiled'.format(ix)) diff --git a/tests/test_cog.py b/tests/test_cog.py index 448b301e..75f6e6aa 100644 --- a/tests/test_cog.py +++ b/tests/test_cog.py @@ -69,12 +69,12 @@ def test_validate_unoptimized(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.tif') - raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16) + raster_data = 1000 * np.random.rand(1024, 1024).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update( height=raster_data.shape[0], - width=raster_data.shape[1] + width=raster_data.shape[1], ) with rasterio.open(outfile, 'w', **profile) as dst: @@ -87,7 +87,7 @@ def test_validate_no_overviews(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.tif') - raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16) + raster_data = 1000 * np.random.rand(1024, 1024).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(