Skip to content

Commit

Permalink
Merge pull request #2466 from DennisHeimbigner/fixedstring.dmh
Browse files Browse the repository at this point in the history
Fix support for reading arrays of HDF5 fixed size strings
  • Loading branch information
WardF committed Aug 2, 2022
2 parents 5ebc855 + 2b45c7e commit b03988e
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 24 deletions.
3 changes: 2 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ This file contains a high-level description of this package's evolution. Release

## 4.9.1 - T.B.D.

* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
* [Bug Fix] Provide a default enum const when fill value does not match any enum constant for the value zero. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2462).
* [Bug Fi] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448).
* [Bug Fix] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448).
* [Bug Fix] Fix quantize with CLASSIC_MODEL files. See [Github #2405](https://github.com/Unidata/netcdf-c/pull/2445).
* [Enhancement] Add `--disable-quantize` option to `configure`.
* [Bug Fix] Fix CMakeLists.txt to handle all acceptable boolean values for -DPLUGIN_INSTALL_DIR. See [Github #2430](https://github.com/Unidata/netcdf-c/pull/2430).
Expand Down
39 changes: 34 additions & 5 deletions libhdf5/hdf5var.c
Original file line number Diff line number Diff line change
Expand Up @@ -1880,6 +1880,9 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
void *bufr = NULL;
int need_to_convert = 0;
size_t len = 1;
int fixedlengthstring = 0;
hsize_t fstring_len = 0;
size_t fstring_count = 1;

/* Find info for this file, group, and var. */
if ((retval = nc4_hdf5_find_grp_h5_var(ncid, varid, &h5, &grp, &var)))
Expand Down Expand Up @@ -2068,14 +2071,21 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
H5Tget_size(hdf5_type->hdf_typeid) > 1 &&
!H5Tis_variable_str(hdf5_type->hdf_typeid))
{
hsize_t fstring_len;
size_t k;

if ((fstring_len = H5Tget_size(hdf5_type->hdf_typeid)) == 0)
BAIL(NC_EHDFERR);
if (!(*(char **)data = malloc(1 + fstring_len)))
BAIL(NC_ENOMEM);
bufr = *(char **)data;
}
/* Compute the total number of strings to read */
if (var->ndims) {
for (k = 0; k < var->ndims; k++) {
fstring_count *= countp[k];
}
}
/* Allocate space for the all the strings */
if (!(bufr = malloc(fstring_len*fstring_count)))
BAIL(NC_ENOMEM);
fixedlengthstring = 1;
}

/* Create the data transfer property list. */
if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0)
Expand Down Expand Up @@ -2128,6 +2138,24 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
}
#endif /* USE_PARALLEL4 */
}

/* If we read a sequence of fixed length strings, then we need to convert to char* in memory */
if(fixedlengthstring) {
size_t k;
char** strvec = (char**)data;
char* p;
for(k=0;k < fstring_count;k++) {
char* eol;
if((p = (char*)malloc(1+fstring_len))==NULL) BAIL(NC_ENOMEM);
memcpy(p,((char*)bufr)+(k*fstring_len),fstring_len);
eol = p + fstring_len;
*eol = '\0';
strvec[k] = p; p = NULL;
}
free(bufr);
bufr = NULL;
}

/* Now we need to fake up any further data that was asked for,
using the fill values instead. First skip past the data we
just read, if any. */
Expand Down Expand Up @@ -2207,6 +2235,7 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
}

exit:
if(fixedlengthstring && bufr) free(bufr);
if (file_spaceid > 0)
if (H5Sclose(file_spaceid) < 0)
BAIL2(NC_EHDFERR);
Expand Down
6 changes: 3 additions & 3 deletions libsrc4/nc4internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -2106,11 +2106,11 @@ NC_createglobalstate(void)
}
/* Initialize struct pointers */
if((nc_globalstate->rcinfo = calloc(1,sizeof(struct NCRCinfo)))==NULL)
{stat = NC_ENOMEM; goto done;}
{stat = NC_ENOMEM; goto done;}
if((nc_globalstate->rcinfo->entries = nclistnew())==NULL)
{stat = NC_ENOMEM; goto done;}
{stat = NC_ENOMEM; goto done;}
if((nc_globalstate->rcinfo->s3profiles = nclistnew())==NULL)
{stat = NC_ENOMEM; goto done;}
{stat = NC_ENOMEM; goto done;}

/* Get environment variables */
if(getenv(NCRCENVIGNORE) != NULL)
Expand Down
1 change: 1 addition & 0 deletions nc_test4/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ IF(BUILD_UTILITIES)
ADD_SH_TEST(nc_test4 tst_misc)
build_bin_test(tst_fillonly)
ADD_SH_TEST(nc_test4 test_fillonly)
ADD_SH_TEST(nc_test4 tst_fixedstring)
IF(USE_HDF5 AND ENABLE_FILTER_TESTING)
build_bin_test(tst_filterparser)
build_bin_test(test_filter)
Expand Down
18 changes: 16 additions & 2 deletions nc_test4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ TESTS += run_grp_rename.sh tst_misc.sh
check_PROGRAMS += tst_fillonly
TESTS += test_fillonly.sh

# H5 and nczarr Fixed string support
TESTS += tst_fixedstring.sh

# Szip Tests (requires ncdump)
if HAVE_H5Z_SZIP
check_PROGRAMS += test_szip h5testszip
Expand Down Expand Up @@ -112,7 +115,8 @@ ref_filter_repeat.txt ref_fillonly.cdl test_fillonly.sh \
ref_filter_order_create.txt ref_filter_order_read.txt \
ref_any.cdl tst_specific_filters.sh tst_unknown.sh \
tst_virtual_datasets.c noop1.cdl unknown.cdl \
tst_broken_files.c
tst_broken_files.c \
tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl

# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
Expand All @@ -131,9 +135,19 @@ floats*.nc floats*.cdl shorts*.nc shorts*.cdl ints*.nc ints*.cdl \
testfilter_reg.nc filterrepeat.txt tmp_fillonly.nc \
testfilter_order.nc crfilterorder.txt rdfilterorder.txt 1 \
tmp_*.txt tmp_*.nc tmp*.dump tmp*.cdl tmp*.txt tmp*.tmp \
tmp_bzip2.c bzip2.nc noop.nc tmp_*.dmp
tmp_bzip2.c bzip2.nc noop.nc tmp_*.dmp tmp_*.cdl

DISTCLEANFILES = findplugin.sh run_par_test.sh

# If valgrind is present, add valgrind targets.
@VALGRIND_CHECK_RULES@

# The (otherwise unused) program build_fixedstring.c
# is used to generate the test file ref_fixedstring.h5.
# That test file is build and included as part of the distribution,
# so the build_fixedstring.c program generally does not need to
# be executed unless the test file needs to be modified..

check_PROGRAMS += build_fixedstring
ref_fixedstring.h5: build_fixedstring.c
${srcdir}/buildfixedstring
81 changes: 81 additions & 0 deletions nc_test4/build_fixedstring.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/* This is part of the netCDF package. Copyright 2018 University
Corporation for Atmospheric Research/Unidata See COPYRIGHT file for
conditions of use.
This program does HDF5 string stuff.
Here's a HDF5 sample programs:
http://hdf.ncsa.uiuc.edu/training/other-ex5/sample-programs/strings.c
*/
#include <string.h>
#include "err_macros.h"
#include <hdf5.h>

#define FILE_NAME "ref_fixedstring.h5"
#define DIM1_LEN 4
#define ATT_NAME1 "att1"
#define ATT_NAMEN "attn"
#define GRP_NAME "group"

int
main()
{
hid_t fileid, typeid;
hid_t spaceid1, attid1;
hid_t spaceidn, attidn;
hid_t dataspaceid1, dataset1;
hid_t dataspaceidn, datasetn;
size_t type_size;

const char data1[4] = "abcd";
const char datan[16] = "abcdefghijklmnop";
const hsize_t dims[1] = {4};

printf("\n*** Checking HDF5 fixed length string types.\n");

printf("*** Create HDF5 Dataset ...");

/* Open file. */
if ((fileid = H5Fcreate(FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;

/* Create fixed string type. */
if ((typeid = H5Tcopy(H5T_C_S1)) < 0) ERR;
type_size = 4 * sizeof(char);
if (H5Tset_size (typeid, type_size) < 0) ERR;
if (H5Tset_strpad (typeid, H5T_STR_NULLPAD) < 0) ERR;

/* Write a scalar attribute of this type. */
if ((spaceid1 = H5Screate(H5S_SCALAR)) < 0) ERR;
if ((attid1 = H5Acreate1(fileid, ATT_NAME1, typeid, spaceid1, H5P_DEFAULT)) < 0) ERR;
if (H5Awrite(attid1, typeid, &data1) < 0) ERR;

/* Write a vector attribute of this type. */
if ((spaceidn = H5Screate_simple(1,dims,NULL)) < 0) ERR;
if ((attidn = H5Acreate1(fileid, ATT_NAMEN, typeid, spaceidn, H5P_DEFAULT)) < 0) ERR;
if (H5Awrite(attidn, typeid, &datan) < 0) ERR;

/* Write a scalar variable of this type. */
if ((dataspaceid1 = H5Screate(H5S_SCALAR)) < 0) ERR;
dataset1 = H5Dcreate1(fileid, "v1", typeid, dataspaceid1, H5P_DEFAULT);
if (H5Dwrite (dataset1, typeid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data1) < 0) ERR;

/* Write a vector variable of this type. */
if ((dataspaceidn = H5Screate_simple(1,dims,NULL)) < 0) ERR;
datasetn = H5Dcreate1(fileid, "vn", typeid, dataspaceidn, H5P_DEFAULT);
if (H5Dwrite (datasetn, typeid, H5S_ALL, H5S_ALL, H5P_DEFAULT, datan) < 0) ERR;

/* Close everything up. */
if (H5Dclose(datasetn) < 0) ERR;
if (H5Sclose(dataspaceidn) < 0) ERR;
if (H5Dclose(dataset1) < 0) ERR;
if (H5Sclose(dataspaceid1) < 0) ERR;
if (H5Aclose(attidn) < 0) ERR;
if (H5Sclose(spaceidn) < 0) ERR;
if (H5Aclose(attid1) < 0) ERR;
if (H5Sclose(spaceid1) < 0) ERR;
if (H5Tclose(typeid) < 0) ERR;
if (H5Fclose(fileid) < 0) ERR;

SUMMARIZE_ERR;
FINAL_RESULTS;
}
9 changes: 9 additions & 0 deletions nc_test4/ref_fixedstring.cdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
netcdf ref_fixedstring {
dimensions:
phony_dim_0 = 3 ;
variables:
string test(phony_dim_0) ;
data:

test = "foo", "bar", "baz" ;
}
Binary file added nc_test4/ref_fixedstring.h5
Binary file not shown.
15 changes: 15 additions & 0 deletions nc_test4/tst_fixedstring.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/sh

if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh

set -e

# Note, the test file for this is ref_fixedstring.h5
# But is is generated by the (otherwise unused) program
# ../h5_test/tst_h_fixedstrings.c.

echo "*** Test reading a file with HDF5 fixed length strings"
rm -f ./tmp_fixedstring.cdl
$NCDUMP ${srcdir}/ref_fixedstring.h5 > ./tmp_fixedstring.cdl
diff -b -w ${srcdir}/ref_fixedstring.cdl ./tmp_fixedstring.cdl
24 changes: 11 additions & 13 deletions nc_test4/tst_vars3.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ main(int argc, char **argv)
if (nc_close(ncid)) ERR;
}
SUMMARIZE_ERR;
#ifdef HAVE_H5Z_SZIP
#ifdef HAVE_H5Z_SZIP
printf("**** testing simple szip filter setup...");
{
int ncid;
Expand Down Expand Up @@ -727,15 +727,18 @@ main(int argc, char **argv)
} /* next mask */
}
SUMMARIZE_ERR;
#else
#else /*!HAVE_H5Z_SZIP*/
/* This code is run if szip is not present in HDF5. It checks that
* nc_def_var_szip() returns NC_ENOFILTER in that case. */
/* WARNING: This code no longer works if plugins is enabled. This is
because the plugin szip wrapper will be found, thus confusing HDF5.
*/
#ifdef HAVE_SZIP
printf("**** testing szip handling when szip not built...");
{
int ncid;
int dimid;
int varid;
unsigned int params[NUM_PARAMS_IN];

/* Create a netcdf-4 file with one dimensions. */
if (nc_create(FILE_NAME, NC_NETCDF4, &ncid)) ERR;
Expand All @@ -744,19 +747,14 @@ main(int argc, char **argv)
/* Add a var. Try to turn on szip filter, but it will return
* error. */
if (nc_def_var(ncid, V_SMALL, NC_INT64, NDIMS1, &dimid, &varid)) ERR;
params[0] = NC_SZIP_NN; /* options_mask */
params[1] = NC_SZIP_EC_BPP_IN; /* pixels_per_block */
if (nc_def_var_chunking(ncid, varid, NC_CHUNKED, NULL)) ERR;
{ int stat;
stat = nc_def_var_filter(ncid, varid, H5_FILTER_SZIP, NUM_PARAMS_IN, params);
if(stat != NC_ENOFILTER)
ERR;
}
if (nc_def_var_szip(ncid, varid, NC_SZIP_NN,
NC_SZIP_EC_BPP_IN) != NC_ENOFILTER) ERR;
int stat;
if ((stat=nc_def_var_szip(ncid, varid, NC_SZIP_NN, NC_SZIP_EC_BPP_IN)) != NC_ENOFILTER)
ERR;
if (nc_close(ncid)) ERR;
}
SUMMARIZE_ERR;
#endif /* HAVE_SZ */
#endif /*HAVE_SZIP*/
#endif /*!HAVE_H5Z_SZIP*/
FINAL_RESULTS;
}

0 comments on commit b03988e

Please sign in to comment.