Cambridge-ICCS · AmyOctoCat · Jun 14, 2024 · Jun 14, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/oneflux/partition/auxiliary.py b/oneflux/partition/auxiliary.py
@@ -28,6 +28,7 @@
 #FLOAT_PREC = 'f8'
 FLOAT_PREC = 'f4'
 DOUBLE_PREC = 'f8'
+STRING_VARIABLE_LENGTH = 'U12'
 
 
 _log = logging.getLogger(__name__)
@@ -111,7 +112,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
     s_string = s_string.replace(' ', '')
     s_string = s_string.replace('-1.#IND000', '-9999')
     s_string = s_string.replace('\r', '')
-    u_string = unicode(s_string)
+    u_string = str(s_string)
     pw_array = numpy.genfromtxt(StringIO(u_string), dtype=FLOAT_PREC, delimiter=',', skip_header=0, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     pw_array = numpy.ma.filled(pw_array, numpy.NaN)
     # **************************************************************************************************************************************************
@@ -204,7 +205,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
             figure_basename = figure_basename.replace('_PW', '') # remove _PW from PW data source filename
 
         record_interval = (timedelta(minutes=30) if resolution == 'hh' else timedelta(minutes=60))
-        timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in xrange(1, py_array.size + 1)]
+        timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in range(1, py_array.size + 1)]
 
         _log.debug("Using year={y}, resolution={r}, first timestamp={f}, last timestamp={l}".format(y=year, r=resolution, f=timestamp_list[0], l=timestamp_list[-1]))
 

diff --git a/oneflux/partition/daytime.py b/oneflux/partition/daytime.py
@@ -909,7 +909,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
     ###############################################
 
     #### Creating the arrays we're going to use
-    n_parasets = long(365 / winsize) * 2
+    n_parasets = int(365 / winsize) * 2
     params = numpy.zeros((3, 2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
     params_ok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
     params_nok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
@@ -1041,7 +1041,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
         #ind[i, :, :] = long((day_begin + winsize / 2.0) * 48.0)
 
         #### Calculate the first index of the window we're using now
-        ind[:, :, i] = long((day_begin + winsize / 2.0) * 48.0)
+        ind[:, :, i] = int((day_begin + winsize / 2.0) * 48.0)
 
         '''
         #print("ind[:, :, i]")
@@ -1938,12 +1938,12 @@ def percentiles_fn(data, columns, values=[0.0, 0.25, 0.5, 0.75, 1.0], remove_mis
 
         #### Setting ind to the percentile wanted
         if values[i] <= 0.5:
-            ind = long(values[i] * n_elements)
+            ind = int(values[i] * n_elements)
         else:
-            ind = long(values[i] * (n_elements + 1))
+            ind = int(values[i] * (n_elements + 1))
 
         if ind >= n_elements:
-            ind = n_elements - long(1)
+            ind = n_elements - int(1)
 
         if i == 0:
             result = data[columns[0]][sorted_index_arr[ind]]

diff --git a/oneflux/partition/library.py b/oneflux/partition/library.py
@@ -23,7 +23,7 @@
 from oneflux import ONEFluxError
 from oneflux.partition.ecogeo import lloyd_taylor, lloyd_taylor_dt, hlrc_lloyd, hlrc_lloydvpd
 from oneflux.partition.ecogeo import hlrc_lloyd_afix, hlrc_lloydvpd_afix, lloydt_e0fix
-from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, NAN, nan, not_nan
+from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, STRING_VARIABLE_LENGTH, NAN, nan, not_nan
 
 from oneflux.graph.compare import plot_comparison
 from oneflux.utils.files import file_exists_not_empty
@@ -76,7 +76,7 @@ def load_output(filename, delimiter=',', skip_header=1):
     _log.debug("Finished loading headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)
@@ -1299,7 +1299,7 @@ def load_outputs(filename, delimiter=',', skip_header=1, is_not_hourly=True, is_
     _log.debug("Loaded headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)

diff --git a/oneflux/partition/nighttime.py b/oneflux/partition/nighttime.py
@@ -246,7 +246,7 @@ def flux_partition(data, lat, tempvar='tair', nomsg=False, temp_output_filename=
     julmin, julmax = int(juldays[0]), int(numpy.max(juldays))  ### first/last day of year
     n_regr = 0                                                 ### counter of number of regressions/optimizations
 
-    window_steps = range(julmin, julmax + 1, STEP_SIZE)
+    window_steps = list(range(julmin, julmax + 1, STEP_SIZE))
 
     # TODO: (potential) add e0_1_list, e0_2_list, e0_3_list, and corresponding se and idx to track individual
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
-numpy>=1.11.0,<1.16.0
+numpy<2,>=1.18
 scipy>=0.17.0
 matplotlib>=1.5.1
-statsmodels>=0.8.0,<0.11.0
+statsmodels==0.14.1
+pytest==8.2.2
diff --git a/runoneflux.py b/runoneflux.py
@@ -81,7 +81,7 @@
     # start execution
     try:
         # check arguments
-        print os.path.join(args.datadir, args.sitedir)
+        print(os.path.join(args.datadir, args.sitedir))
         if not os.path.isdir(os.path.join(args.datadir, args.sitedir)):
             raise ONEFluxError("Site dir not found: {d}".format(d=args.sitedir))
 

diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
@@ -1,31 +1,38 @@
 import pytest
 import os, glob
 import errno
-import shutil
-import urllib
-from distutils.dir_util import copy_tree
+import urllib.request
+from shutil import copytree
 import logging
 import time
 
 _log = logging.getLogger(__name__)
 
+
 @pytest.fixture(scope="module")
 def get_data():
     '''
     Utilising python to obtain sample test data. Function currently unused. 
     as a fixture in this class. 
     '''
+    if os.path.isdir('tests/data'):
+        _log.info('Skipping sample data retrieval as sample test data directory '
+                  'already exists: ./tests/data')
+        return
+
     from zipfile import ZipFile
-    urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip') 
-    urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip') 
-
-    input_zip = "US-ARc_sample_input.zip"
-    output_zip = "US-ARc_sample_output.zip"
-
-    with ZipFile(input_zip) as zi, ZipFile(output_zip) as zo:
+    input_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
+    output_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')
+
+    _log.info('successfully downloaded sample data zip files. Extracting...')
+
+    with ZipFile(input_zip_name) as zi, ZipFile(output_zip_name) as zo:
         zi.extractall(path='tests/data/test_input')
         zo.extractall(path='tests/data/test_output')
 
+    _log.info('sample data successfully extracted from zip files')
+
+
 def equal_csv(csv_1, csv_2):
     '''
     Check equality of two csv files.
@@ -45,7 +52,7 @@ def equal_csv(csv_1, csv_2):
 
 
 @pytest.fixture
-def setup_data():
+def setup_data(get_data):
     '''
     Set up input data for run_partition_nt test. 
 
@@ -58,21 +65,21 @@ def setup_data():
     except OSError as e:
         if e.errno == errno.EEXIST:
             print("directory exists")
-            
+
     testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'
-    
-    copy_tree('tests/data/test_input/', testdata)
+
+    copytree('tests/data/test_input/', testdata, dirs_exist_ok=True)
 
     refoutdir = 'tests/data/test_output/US-ARc_sample_output'
 
-    copy_tree(os.path.join(refoutdir, '07_meteo_proc'), \
-        os.path.join(testdata, '07_meteo_proc'))
-    copy_tree(os.path.join(refoutdir, '08_nee_proc'), \
-        os.path.join(testdata, '08_nee_proc/'))
-    copy_tree(os.path.join(refoutdir, '02_qc_auto'), \
-        os.path.join(testdata, '02_qc_auto/'))
-    
-    
+    copytree(os.path.join(refoutdir, '07_meteo_proc'), \
+             os.path.join(testdata, '07_meteo_proc'), dirs_exist_ok=True)
+    copytree(os.path.join(refoutdir, '08_nee_proc'), \
+             os.path.join(testdata, '08_nee_proc/'), dirs_exist_ok=True)
+    copytree(os.path.join(refoutdir, '02_qc_auto'), \
+             os.path.join(testdata, '02_qc_auto/'), dirs_exist_ok=True)
+
+
 def test_run_partition_nt(setup_data):
     '''
     Run partition_nt on single percentile.
@@ -81,33 +88,35 @@ def test_run_partition_nt(setup_data):
     refoutdir = "./tests/data/test_output/"
     siteid = "US-ARc"
     sitedir = "US-ARc_sample_input"
-    years = [2005] # years = [2005, 2006]
+    years = [2005]  # years = [2005, 2006]
     # PROD_TO_COMPARE = ['c', 'y']
-    PROD_TO_COMPARE = ['y',]
+    PROD_TO_COMPARE = ['y', ]
     # PERC_TO_COMPARE = ['1.25', '3.75',]
-    PERC_TO_COMPARE = ['1.25',]
-    
+    PERC_TO_COMPARE = ['1.25', ]
+
     from oneflux.tools.partition_nt import remove_previous_run, run_python
-    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
-                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
+    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True,
+                        prod_to_compare=PROD_TO_COMPARE,
+                        perc_to_compare=PERC_TO_COMPARE,
                         years_to_compare=years)
 
-    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
+    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir,
+               prod_to_compare=PROD_TO_COMPARE,
                perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
-    
+
     # check whether csv of "output" is same as csv of reference
 
     # the generated output is actually in the "input" directory.
     rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
     nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
     nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)
-    
+
     # paths to the "reference" output data
     refoutdir = os.path.join(refoutdir, "US-ARc_sample_output", "10_nee_partition_nt")
     ref_nee_y_files = glob.glob(os.path.join(refoutdir, "nee_y_1.25_US-ARc_2005*"))
-   
+
     assert len(nee_y_files) == len(ref_nee_y_files)
-    retval = True 
+    retval = True
     for f, b in zip(nee_y_files, ref_nee_y_files):
         print(f, b)
         assert equal_csv(f, b) == True