Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Initial scoping for upgrade to Python 3 #3

Draft
wants to merge 6 commits into
base: tests_framework
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions oneflux/partition/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#FLOAT_PREC = 'f8'
FLOAT_PREC = 'f4'
DOUBLE_PREC = 'f8'
STRING_VARIABLE_LENGTH = 'U12'


_log = logging.getLogger(__name__)
Expand Down Expand Up @@ -111,7 +112,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
s_string = s_string.replace(' ', '')
s_string = s_string.replace('-1.#IND000', '-9999')
s_string = s_string.replace('\r', '')
u_string = unicode(s_string)
u_string = str(s_string)
pw_array = numpy.genfromtxt(StringIO(u_string), dtype=FLOAT_PREC, delimiter=',', skip_header=0, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
pw_array = numpy.ma.filled(pw_array, numpy.NaN)
# **************************************************************************************************************************************************
Expand Down Expand Up @@ -204,7 +205,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
figure_basename = figure_basename.replace('_PW', '') # remove _PW from PW data source filename

record_interval = (timedelta(minutes=30) if resolution == 'hh' else timedelta(minutes=60))
timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in xrange(1, py_array.size + 1)]
timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in range(1, py_array.size + 1)]

_log.debug("Using year={y}, resolution={r}, first timestamp={f}, last timestamp={l}".format(y=year, r=resolution, f=timestamp_list[0], l=timestamp_list[-1]))

Expand Down
10 changes: 5 additions & 5 deletions oneflux/partition/daytime.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
###############################################

#### Creating the arrays we're going to use
n_parasets = long(365 / winsize) * 2
n_parasets = int(365 / winsize) * 2
params = numpy.zeros((3, 2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
params_ok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
params_nok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
Expand Down Expand Up @@ -1041,7 +1041,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
#ind[i, :, :] = long((day_begin + winsize / 2.0) * 48.0)

#### Calculate the first index of the window we're using now
ind[:, :, i] = long((day_begin + winsize / 2.0) * 48.0)
ind[:, :, i] = int((day_begin + winsize / 2.0) * 48.0)

'''
#print("ind[:, :, i]")
Expand Down Expand Up @@ -1938,12 +1938,12 @@ def percentiles_fn(data, columns, values=[0.0, 0.25, 0.5, 0.75, 1.0], remove_mis

#### Setting ind to the percentile wanted
if values[i] <= 0.5:
ind = long(values[i] * n_elements)
ind = int(values[i] * n_elements)
else:
ind = long(values[i] * (n_elements + 1))
ind = int(values[i] * (n_elements + 1))

if ind >= n_elements:
ind = n_elements - long(1)
ind = n_elements - int(1)

if i == 0:
result = data[columns[0]][sorted_index_arr[ind]]
Expand Down
6 changes: 3 additions & 3 deletions oneflux/partition/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from oneflux import ONEFluxError
from oneflux.partition.ecogeo import lloyd_taylor, lloyd_taylor_dt, hlrc_lloyd, hlrc_lloydvpd
from oneflux.partition.ecogeo import hlrc_lloyd_afix, hlrc_lloydvpd_afix, lloydt_e0fix
from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, NAN, nan, not_nan
from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, STRING_VARIABLE_LENGTH, NAN, nan, not_nan

from oneflux.graph.compare import plot_comparison
from oneflux.utils.files import file_exists_not_empty
Expand Down Expand Up @@ -76,7 +76,7 @@ def load_output(filename, delimiter=',', skip_header=1):
_log.debug("Finished loading headers: {h}".format(h=headers))

_log.debug("Started loading data")
dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
data = numpy.ma.filled(data, vfill)
Expand Down Expand Up @@ -1299,7 +1299,7 @@ def load_outputs(filename, delimiter=',', skip_header=1, is_not_hourly=True, is_
_log.debug("Loaded headers: {h}".format(h=headers))

_log.debug("Started loading data")
dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
data = numpy.ma.filled(data, vfill)
Expand Down
2 changes: 1 addition & 1 deletion oneflux/partition/nighttime.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def flux_partition(data, lat, tempvar='tair', nomsg=False, temp_output_filename=
julmin, julmax = int(juldays[0]), int(numpy.max(juldays)) ### first/last day of year
n_regr = 0 ### counter of number of regressions/optimizations

window_steps = range(julmin, julmax + 1, STEP_SIZE)
window_steps = list(range(julmin, julmax + 1, STEP_SIZE))

# TODO: (potential) add e0_1_list, e0_2_list, e0_3_list, and corresponding se and idx to track individual

Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
numpy>=1.11.0,<1.16.0
numpy<2,>=1.18
scipy>=0.17.0
matplotlib>=1.5.1
statsmodels>=0.8.0,<0.11.0
statsmodels==0.14.1
pytest==8.2.2
2 changes: 1 addition & 1 deletion runoneflux.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
# start execution
try:
# check arguments
print os.path.join(args.datadir, args.sitedir)
print(os.path.join(args.datadir, args.sitedir))
if not os.path.isdir(os.path.join(args.datadir, args.sitedir)):
raise ONEFluxError("Site dir not found: {d}".format(d=args.sitedir))

Expand Down
75 changes: 42 additions & 33 deletions tests/python/integration/test_partitioning.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,38 @@
import pytest
import os, glob
import errno
import shutil
import urllib
from distutils.dir_util import copy_tree
import urllib.request
from shutil import copytree
import logging
import time

_log = logging.getLogger(__name__)


@pytest.fixture(scope="module")
def get_data():
'''
Utilising python to obtain sample test data. Function currently unused.
as a fixture in this class.
'''
if os.path.isdir('tests/data'):
_log.info('Skipping sample data retrieval as sample test data directory '
'already exists: ./tests/data')
return

from zipfile import ZipFile
urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')
urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')

input_zip = "US-ARc_sample_input.zip"
output_zip = "US-ARc_sample_output.zip"

with ZipFile(input_zip) as zi, ZipFile(output_zip) as zo:
input_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
output_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')

_log.info('successfully downloaded sample data zip files. Extracting...')

with ZipFile(input_zip_name) as zi, ZipFile(output_zip_name) as zo:
zi.extractall(path='tests/data/test_input')
zo.extractall(path='tests/data/test_output')

_log.info('sample data successfully extracted from zip files')


def equal_csv(csv_1, csv_2):
'''
Check equality of two csv files.
Expand All @@ -45,7 +52,7 @@ def equal_csv(csv_1, csv_2):


@pytest.fixture
def setup_data():
def setup_data(get_data):
'''
Set up input data for run_partition_nt test.

Expand All @@ -58,21 +65,21 @@ def setup_data():
except OSError as e:
if e.errno == errno.EEXIST:
print("directory exists")

testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'
copy_tree('tests/data/test_input/', testdata)

copytree('tests/data/test_input/', testdata, dirs_exist_ok=True)

refoutdir = 'tests/data/test_output/US-ARc_sample_output'

copy_tree(os.path.join(refoutdir, '07_meteo_proc'), \
os.path.join(testdata, '07_meteo_proc'))
copy_tree(os.path.join(refoutdir, '08_nee_proc'), \
os.path.join(testdata, '08_nee_proc/'))
copy_tree(os.path.join(refoutdir, '02_qc_auto'), \
os.path.join(testdata, '02_qc_auto/'))
copytree(os.path.join(refoutdir, '07_meteo_proc'), \
os.path.join(testdata, '07_meteo_proc'), dirs_exist_ok=True)
copytree(os.path.join(refoutdir, '08_nee_proc'), \
os.path.join(testdata, '08_nee_proc/'), dirs_exist_ok=True)
copytree(os.path.join(refoutdir, '02_qc_auto'), \
os.path.join(testdata, '02_qc_auto/'), dirs_exist_ok=True)


def test_run_partition_nt(setup_data):
'''
Run partition_nt on single percentile.
Expand All @@ -81,33 +88,35 @@ def test_run_partition_nt(setup_data):
refoutdir = "./tests/data/test_output/"
siteid = "US-ARc"
sitedir = "US-ARc_sample_input"
years = [2005] # years = [2005, 2006]
years = [2005] # years = [2005, 2006]
# PROD_TO_COMPARE = ['c', 'y']
PROD_TO_COMPARE = ['y',]
PROD_TO_COMPARE = ['y', ]
# PERC_TO_COMPARE = ['1.25', '3.75',]
PERC_TO_COMPARE = ['1.25',]
PERC_TO_COMPARE = ['1.25', ]

from oneflux.tools.partition_nt import remove_previous_run, run_python
remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True,
prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True,
prod_to_compare=PROD_TO_COMPARE,
perc_to_compare=PERC_TO_COMPARE,
years_to_compare=years)

run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
run_python(datadir=datadir, siteid=siteid, sitedir=sitedir,
prod_to_compare=PROD_TO_COMPARE,
perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)

# check whether csv of "output" is same as csv of reference

# the generated output is actually in the "input" directory.
rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)

# paths to the "reference" output data
refoutdir = os.path.join(refoutdir, "US-ARc_sample_output", "10_nee_partition_nt")
ref_nee_y_files = glob.glob(os.path.join(refoutdir, "nee_y_1.25_US-ARc_2005*"))

assert len(nee_y_files) == len(ref_nee_y_files)
retval = True
retval = True
for f, b in zip(nee_y_files, ref_nee_y_files):
print(f, b)
assert equal_csv(f, b) == True
Expand Down
Loading