From 291d581fea7801cca35d5796460cd9dae3fb0476 Mon Sep 17 00:00:00 2001 From: Haakon Ludvig Langeland Ervik <45243236+haakon-e@users.noreply.github.com> Date: Thu, 4 Nov 2021 23:19:45 -0700 Subject: [PATCH] refactor: run short simulation for reference SCM --- src/Pipeline.jl | 5 +- src/ReferenceStats.jl | 14 ++++- src/TurbulenceConvectionUtils.jl | 100 +++++++++++++++---------------- src/helper_funcs.jl | 24 +++++--- test/NetCDFIO/runtests.jl | 2 +- test/Pipeline/config.jl | 6 +- test/Pipeline/runtests.jl | 12 ++++ test/ReferenceModels/runtests.jl | 2 +- test/ReferenceStats/runtests.jl | 2 +- 9 files changed, 99 insertions(+), 68 deletions(-) diff --git a/src/Pipeline.jl b/src/Pipeline.jl index a7af7def6..4a67010d3 100644 --- a/src/Pipeline.jl +++ b/src/Pipeline.jl @@ -86,13 +86,14 @@ function init_calibration(config::Dict{Any, Any}; mode::String = "hpc", job_id:: :Σ_t_start => Σ_t_start, :Σ_t_end => Σ_t_end, ) + # Minibatch mode if !isnothing(batch_size) @info "Training using mini-batches." ref_model_batch = construct_ref_model_batch(kwargs_ref_model) global_ref_models = deepcopy(ref_model_batch.ref_models) # Create input scm stats and namelist file if files don't already exist - run_SCM(global_ref_models, overwrite = overwrite_scm_file) + run_reference_SCM.(global_ref_models, overwrite = overwrite_scm_file) # Generate global reference statistics global_ref_stats = ReferenceStatistics( global_ref_models, @@ -109,7 +110,7 @@ function init_calibration(config::Dict{Any, Any}; mode::String = "hpc", job_id:: else ref_models = construct_reference_models(kwargs_ref_model) # Create input scm stats and namelist file if files don't already exist - run_SCM(ref_models, overwrite = overwrite_scm_file) + run_reference_SCM.(ref_models, overwrite = overwrite_scm_file) end # Generate reference statistics ref_stats = ReferenceStatistics( diff --git a/src/ReferenceStats.jl b/src/ReferenceStats.jl index d11eec8e5..d8f5b549d 100644 --- a/src/ReferenceStats.jl +++ b/src/ReferenceStats.jl @@ -327,16 +327,24 @@ Inputs: function get_time_covariance( m::ReferenceModel, var_names::Vector{String}; - z_scm::Union{Array{Float64, 1}, Nothing} = nothing, -) + z_scm::Vector{FT}, +) where FT <: Real sim_dir = Σ_dir(m) t = nc_fetch(sim_dir, "t") # Find closest interval in data ti_index = argmin(broadcast(abs, t .- get_t_start_Σ(m))) tf_index = argmin(broadcast(abs, t .- get_t_end_Σ(m))) - ts_vec = zeros(0, length(ti_index:tf_index)) + N_samples = length(ti_index:tf_index) + ts_vec = zeros(0, N_samples) num_outputs = length(var_names) pool_var = zeros(num_outputs) + Nz = length(z_scm) + if N_samples < Nz * num_outputs + @error ( + "At least $(Nz * num_outputs) samples are needed to construct a stable empirical covariance vector. "* + "$(N_samples) samples given." + ) + end for (i, var_name) in enumerate(var_names) var_ = fetch_interpolate_transform(var_name, sim_dir, z_scm) diff --git a/src/TurbulenceConvectionUtils.jl b/src/TurbulenceConvectionUtils.jl index 4d0775627..b070c3cb8 100644 --- a/src/TurbulenceConvectionUtils.jl +++ b/src/TurbulenceConvectionUtils.jl @@ -13,7 +13,7 @@ using TurbulenceConvection include(joinpath(@__DIR__, "helper_funcs.jl")) export ModelEvaluator -export run_SCM, run_SCM_handler +export run_SCM, run_SCM_handler, get_scm_namelist, run_reference_SCM export generate_scm_input, get_gcm_les_uuid export save_full_ensemble_data export precondition @@ -128,26 +128,62 @@ function run_SCM(ME::ModelEvaluator; error_check::Bool = false, namelist_args = end """ - run_SCM( - RM::Vector{ReferenceModel}; - overwrite::Bool, - ) where FT<:Real + get_scm_namelist(m::ReferenceModel; overwrite::Bool = false)::Dict + +Fetch the namelist stored in `scm_dir(m)`. +Generate a new namelist if it doesn't exist or `overwrite=true`. +""" +function get_scm_namelist(m::ReferenceModel; overwrite::Bool = false)::Dict + namelist_path = namelist_directory(scm_dir(m), m) + namelist = if ~isfile(namelist_path) | overwrite + NameList.default_namelist(m.case_name, root = scm_dir(m)) + else + JSON.parsefile(namelist_path) + end + return namelist +end + +""" + run_reference_SCM(m::ReferenceModel; overwrite::Bool = false) -Run the single-column model (SCM) for each reference model object +Run the single-column model (SCM) for a reference model object using default parameters. Inputs: - - RM :: Vector of `ReferenceModel`s - - overwrite :: if true, overwrite existing simulation files + - m :: A `ReferenceModel` + - overwrite :: if true, overwrite existing simulation files + - run_single_timestep :: if true, run only one time step Outputs: - Nothing """ -function run_SCM(RM::Vector{ReferenceModel}; overwrite::Bool = false) where {FT <: Real} - - for ref_model in RM - output_dir = scm_dir(ref_model) - if ~isdir(output_dir) | overwrite - run_SCM_handler(ref_model, dirname(output_dir)) +function run_reference_SCM(m::ReferenceModel; overwrite::Bool = false, run_single_timestep = true) + namelist = get_scm_namelist(m, overwrite = overwrite) + # prepare and run simulation + output_dir = scm_dir(m) + if ~isfile(get_stats_path(output_dir)) | overwrite + default_t_max = namelist["time_stepping"]["t_max"] + if run_single_timestep + # Run only 1 timestep -- since we don't need output data, only simulation config + namelist["time_stepping"]["t_max"] = namelist["time_stepping"]["dt"] + end + namelist["meta"]["uuid"] = uuid(m) + namelist["output"]["output_root"] = dirname(output_dir) + # if `LES_driven_SCM` case, provide input LES stats file + if m.case_name == "LES_driven_SCM" + namelist["meta"]["lesfile"] = get_stats_path(y_dir(m)) + end + # run TurbulenceConvection.jl + try + main(namelist) + catch + @warn "Default TurbulenceConvection.jl simulation failed. Verify default setup." + end + if run_single_timestep + # reset t_max to default and overwrite stored namelist file + namelist["time_stepping"]["t_max"] = default_t_max + open(namelist_directory(output_dir, m), "w") do io + JSON.print(io, namelist, 4) + end end end end @@ -257,42 +293,6 @@ function create_parameter_vectors(u_names::Vector{String}, u::Vector{FT}) where return (u_names_out, u_out) end -""" - run_SCM_handler( - m::ReferenceModel, - output_dir::String; - ) where {FT<:AbstractFloat} - -Run a case with default SCM parameters and return data -directory pointing to where data is stored for simulation run. - -Inputs: - - m :: Reference model - - output_dir :: Directory to store simulation results in -Outputs: - - output_dirs :: directory containing output data from the SCM run. -""" -function run_SCM_handler(m::ReferenceModel, output_dir::String) where {FT <: AbstractFloat} - - namelist = NameList.default_namelist(m.case_name) - # calling NameList.default_namelist writes namelist to pwd - rm("namelist_" * namelist["meta"]["casename"] * ".in") - namelist["meta"]["uuid"] = uuid(m) - # set output dir to `output_dir` - namelist["output"]["output_root"] = output_dir - # if `LES_driven_SCM` case, provide input LES stats file - if m.case_name == "LES_driven_SCM" - namelist["meta"]["lesfile"] = get_stats_path(y_dir(m)) - end - # run TurbulenceConvection.jl - try - main(namelist) - catch - @warn "Default TurbulenceConvection.jl simulation failed. Verify default setup." - end - return data_directory(output_dir, m.case_name, namelist["meta"]["uuid"]) -end - """ generate_scm_input(model_evaluator::ModelEvaluator, outdir_path::String = pwd()) diff --git a/src/helper_funcs.jl b/src/helper_funcs.jl index c2e620d49..021eb8ec4 100644 --- a/src/helper_funcs.jl +++ b/src/helper_funcs.jl @@ -215,15 +215,25 @@ function get_stats_path(dir) return stat_files[1] catch e if isa(e, AssertionError) - @warn "No unique stats netCDF file found at $dir. Extending search to other files." - stat_files = readdir(stats, join = true) # WindowsOS/julia 1.6.0 relpath bug - if length(stat_files) == 1 - return stat_files[1] - else - @error "No unique stats file found at $dir." + @warn "No unique stats netCDF file found in $stats. Extending search to other files." + try + stat_files = readdir(stats, join = true) # WindowsOS/julia 1.6.0 relpath bug + if length(stat_files) == 1 + return stat_files[1] + else + @error "No unique stats file found at $dir." + end + catch f + if isa(f, Base.IOError) + @warn "Extended search errored with: $f" + return "" + else + throw(f) + end end else - @error "An error occurred retrieving the stats path at $dir." + @warn "An error occurred retrieving the stats path at $dir. Throwing..." + throw(e) end end end diff --git a/test/NetCDFIO/runtests.jl b/test/NetCDFIO/runtests.jl index 7b2bf81cf..f56b55ff6 100644 --- a/test/NetCDFIO/runtests.jl +++ b/test/NetCDFIO/runtests.jl @@ -27,7 +27,7 @@ using EnsembleKalmanProcesses.EnsembleKalmanProcessModule ) # Generate ref_stats ref_models = construct_reference_models(kwargs_ref_model) - run_SCM(ref_models, overwrite = false) + run_reference_SCM.(ref_models, overwrite = false, run_single_timestep = false) ref_stats = ReferenceStatistics(ref_models, true, true; y_type = SCM(), Σ_type = SCM()) # Generate config config = Dict() diff --git a/test/Pipeline/config.jl b/test/Pipeline/config.jl index f77f0b67d..81909aaab 100644 --- a/test/Pipeline/config.jl +++ b/test/Pipeline/config.jl @@ -75,10 +75,10 @@ function get_reference_config(::Bomex) config["Σ_reference_type"] = SCM() config["y_names"] = [["thetal_mean", "ql_mean", "qt_mean"]] ref_root_dir = mktempdir() - config["y_dir"] = [joinpath(ref_root_dir, "Output.Bomex.000000")] - config["scm_suffix"] = ["000000"] + config["y_dir"] = [joinpath(ref_root_dir, "Output.Bomex.ref")] + config["scm_suffix"] = ["scm"] config["scm_parent_dir"] = [ref_root_dir] - config["t_start"] = [4.0 * 3600] + config["t_start"] = [2.0 * 3600] config["t_end"] = [6.0 * 3600] return config end diff --git a/test/Pipeline/runtests.jl b/test/Pipeline/runtests.jl index b66a2ae28..c83c70b30 100644 --- a/test/Pipeline/runtests.jl +++ b/test/Pipeline/runtests.jl @@ -14,6 +14,18 @@ include("config.jl") @testset "Pipeline" begin config = get_config() + # Generate reference data + ref_config = config["reference"] + ref_model = ReferenceModel( + ref_config["y_names"][1], + ref_config["y_dir"][1], + ref_config["y_dir"][1], + ref_config["case_name"][1], + ref_config["t_start"][1], + ref_config["t_end"][1], + ) + run_reference_SCM(ref_model, run_single_timestep=false) + # Initialize calibration setup init_calibration(config; mode = "hpc") res_dir_list = glob("results_*_SCM*", config["output"]["outdir_root"]) res_dir = res_dir_list[1] diff --git a/test/ReferenceModels/runtests.jl b/test/ReferenceModels/runtests.jl index 6488b5bcd..694a70156 100644 --- a/test/ReferenceModels/runtests.jl +++ b/test/ReferenceModels/runtests.jl @@ -38,7 +38,7 @@ end :Σ_t_end => [4.5 * 3600, 4.5 * 3600], ) ref_models = construct_reference_models(kwargs_ref_model) - run_SCM(ref_models, overwrite = false) + run_reference_SCM.(ref_models, overwrite = false) Δt = 5.0 * 3600 ref_model = time_shift_reference_model(ref_models[1], Δt) diff --git a/test/ReferenceStats/runtests.jl b/test/ReferenceStats/runtests.jl index fd7299a2d..8a2bb9245 100644 --- a/test/ReferenceStats/runtests.jl +++ b/test/ReferenceStats/runtests.jl @@ -18,7 +18,7 @@ using CalibrateEDMF.TurbulenceConvectionUtils :t_end => repeat([6.0 * 3600], 2), ) ref_models = construct_reference_models(kwargs_ref_model) - run_SCM(ref_models, overwrite = false) + run_reference_SCM.(ref_models, overwrite = false, run_single_timestep = false) # Test only tikhonov vs PCA and tikhonov pca_list = [false, true]