From c5f2dbd1a1af91f41a1299f0ea479b30fa545571 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 24 Apr 2024 22:26:56 +0100 Subject: [PATCH 1/9] refactor: multi-threaded copy of population --- src/Population.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Population.jl b/src/Population.jl index d61cee7f..4b776e82 100644 --- a/src/Population.jl +++ b/src/Population.jl @@ -90,8 +90,12 @@ function Population( ) end -function Base.copy(pop::P)::P where {P<:Population} - return Population([copy(pm) for pm in pop.members]) +function Base.copy(pop::P)::P where {T,L,N,P<:Population{T,L,N}} + copied_members = Vector{PopMember{T,L,N}}(undef, pop.n) + Threads.@threads for i in 1:(pop.n) + copied_members[i] = copy(pop.members[i]) + end + return Population(copied_members) end # Sample random members of the population, and make a new one From 282a255ea0361380065180b4d28c5a8657bcf0c1 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 24 Apr 2024 22:33:06 +0100 Subject: [PATCH 2/9] refactor: multi-threaded hall of fame summary --- src/SymbolicRegression.jl | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/SymbolicRegression.jl b/src/SymbolicRegression.jl index 87fed315..47e2bcf6 100644 --- a/src/SymbolicRegression.jl +++ b/src/SymbolicRegression.jl @@ -921,15 +921,33 @@ function _main_search_loop!( output_file = output_file * ".out$j" end # Write file twice in case exit in middle of filewrite - for out_file in (output_file, output_file * ".bkup") - open(out_file, "w") do io - println(io, "Complexity,Loss,Equation") - for member in dominating - println( - io, - "$(compute_complexity(member, options)),$(member.loss),\"" * - "$(string_tree(member.tree, options, variable_names=dataset.variable_names))\"", - ) + let + dominating_n = length(dominating) + + complexities = Vector{Int}(undef, dominating_n) + losses = Vector{L}(undef, dominating_n) + strings = Vector{String}(undef, dominating_n) + + Threads.@threads for i in 1:dominating_n + member = dominating[i] + complexities[i] = compute_complexity(member, options) + losses[i] = member.loss + strings[i] = string_tree( + member.tree, options; variable_names=dataset.variable_names + ) + end + + s = let tmp_io = IOBuffer() + println(tmp_io, "Complexity,Loss,Equation") + for i in 1:dominating_n + println(tmp_io, "$complexities[i],$losses[i],\"$strings[i]\"") + end + + String(take!(tmp_io)) + end + for out_file in (output_file, output_file * ".bkup") + open(out_file, "w") do io + write(io, s) end end end From 2ce5df7027e275f9535f9219a71353008f7c91b4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 24 Apr 2024 23:03:25 +0100 Subject: [PATCH 3/9] refactor: multi-threaded population simplify --- src/SingleIteration.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SingleIteration.jl b/src/SingleIteration.jl index 20911694..be523e5a 100644 --- a/src/SingleIteration.jl +++ b/src/SingleIteration.jl @@ -108,7 +108,7 @@ function optimize_and_simplify_population( )::Tuple{P,Float64} where {T,L,D<:Dataset{T,L},P<:Population{T,L}} array_num_evals = zeros(Float64, pop.n) do_optimization = rand(pop.n) .< options.optimizer_probability - for j in 1:(pop.n) + Threads.@threads for j in 1:(pop.n) if options.should_simplify tree = pop.members[j].tree tree = simplify_tree!(tree, options.operators) From 78a695dc0e0e6e1cceb3dc7cf1258ad1207ffdae Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 24 Apr 2024 23:07:22 +0100 Subject: [PATCH 4/9] docs: raise level for warning about node occupation --- src/SearchUtils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SearchUtils.jl b/src/SearchUtils.jl index a336cb29..e08aca3f 100644 --- a/src/SearchUtils.jl +++ b/src/SearchUtils.jl @@ -270,7 +270,7 @@ function get_load_string(; head_node_occupation::Float64, parallelism=:serial) parallelism == :serial && return "" out = @sprintf("Head worker occupation: %.1f%%", head_node_occupation * 100) - raise_usage_warning = head_node_occupation > 0.2 + raise_usage_warning = head_node_occupation > 0.4 if raise_usage_warning out *= "." out *= " This is high, and will prevent efficient resource usage." From 1eeff15417f0dad47c21622b3b126c60cff6af6b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 24 Apr 2024 23:12:44 +0100 Subject: [PATCH 5/9] ci: remove macOS 1.6 test --- .github/workflows/CI.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 958a4e06..5911adea 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -37,10 +37,13 @@ jobs: - "1" os: - ubuntu-latest - - macOS-latest include: - os: windows-latest julia-version: "1" + - os: macOS-latest + julia-version: "1" + - os: macOS-latest + julia-version: "1.8" - os: ubuntu-latest julia-version: "~1.11.0-0" From 1babc81758fa24e8d46f0cb62bf9a951c491f3cf Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 25 Apr 2024 01:10:23 +0100 Subject: [PATCH 6/9] fix: avoid parallelism on deterministic mode --- src/SingleIteration.jl | 3 ++- src/Utils.jl | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/SingleIteration.jl b/src/SingleIteration.jl index be523e5a..e99b4bcd 100644 --- a/src/SingleIteration.jl +++ b/src/SingleIteration.jl @@ -7,6 +7,7 @@ using DynamicExpressions: string_tree, simplify_tree!, combine_operators +using ..UtilsModule: @threads_if using ..CoreModule: Options, Dataset, RecordType, DATA_TYPE, LOSS_TYPE using ..ComplexityModule: compute_complexity using ..PopMemberModule: PopMember, generate_reference @@ -108,7 +109,7 @@ function optimize_and_simplify_population( )::Tuple{P,Float64} where {T,L,D<:Dataset{T,L},P<:Population{T,L}} array_num_evals = zeros(Float64, pop.n) do_optimization = rand(pop.n) .< options.optimizer_probability - Threads.@threads for j in 1:(pop.n) + @threads_if !(options.deterministic) for j in 1:(pop.n) if options.should_simplify tree = pop.members[j].tree tree = simplify_tree!(tree, options.operators) diff --git a/src/Utils.jl b/src/Utils.jl index 8cbeb2ac..3b0292e0 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -148,6 +148,16 @@ function poisson_sample(λ::T) where {T} return k - 1 end +macro threads_if(flag, ex) + return quote + if $flag + Threads.@threads $ex + else + $ex + end + end |> esc +end + """ @save_kwargs variable function ... end From 6690ee38381c05237afe6eb681b4ec1dce7fcdf9 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 25 Apr 2024 01:15:56 +0100 Subject: [PATCH 7/9] refactor: move file saving to SearchUtils.jl --- src/SearchUtils.jl | 44 ++++++++++++++++++++++++++++++++++++++- src/SymbolicRegression.jl | 37 ++------------------------------ 2 files changed, 45 insertions(+), 36 deletions(-) diff --git a/src/SearchUtils.jl b/src/SearchUtils.jl index e08aca3f..0698df98 100644 --- a/src/SearchUtils.jl +++ b/src/SearchUtils.jl @@ -7,7 +7,7 @@ using Printf: @printf, @sprintf using Distributed using StatsBase: mean -using DynamicExpressions: AbstractExpressionNode +using DynamicExpressions: AbstractExpressionNode, string_tree using ..UtilsModule: subscriptify using ..CoreModule: Dataset, Options, MAX_DEGREE, RecordType using ..ComplexityModule: compute_complexity @@ -405,6 +405,48 @@ Base.@kwdef struct SearchState{ record::Base.RefValue{RecordType} end +function save_to_file( + dominating, j::Integer, dataset::Dataset{T,L}, options::Options +) where {T,L} + output_file = options.output_file + if nout > 1 + output_file = output_file * ".out$j" + end + dominating_n = length(dominating) + + complexities = Vector{Int}(undef, dominating_n) + losses = Vector{L}(undef, dominating_n) + strings = Vector{String}(undef, dominating_n) + + Threads.@threads for i in 1:dominating_n + member = dominating[i] + complexities[i] = compute_complexity(member, options) + losses[i] = member.loss + strings[i] = string_tree( + member.tree, options; variable_names=dataset.variable_names + ) + end + + s = let + tmp_io = IOBuffer() + + println(tmp_io, "Complexity,Loss,Equation") + for i in 1:dominating_n + println(tmp_io, "$(complexities[i]),$(losses[i]),\"$(strings[i])\"") + end + + String(take!(tmp_io)) + end + + # Write file twice in case exit in middle of filewrite + for out_file in (output_file, output_file * ".bkup") + open(out_file, "w") do io + write(io, s) + end + end + return nothing +end + """ get_cur_maxsize(; options, total_cycles, cycles_remaining) diff --git a/src/SymbolicRegression.jl b/src/SymbolicRegression.jl index 47e2bcf6..26132e95 100644 --- a/src/SymbolicRegression.jl +++ b/src/SymbolicRegression.jl @@ -251,6 +251,7 @@ using .SearchUtilsModule: load_saved_hall_of_fame, load_saved_population, construct_datasets, + save_to_file, get_cur_maxsize, update_hall_of_fame! @@ -916,41 +917,7 @@ function _main_search_loop!( dominating = calculate_pareto_frontier(state.halls_of_fame[j]) if options.save_to_file - output_file = options.output_file - if nout > 1 - output_file = output_file * ".out$j" - end - # Write file twice in case exit in middle of filewrite - let - dominating_n = length(dominating) - - complexities = Vector{Int}(undef, dominating_n) - losses = Vector{L}(undef, dominating_n) - strings = Vector{String}(undef, dominating_n) - - Threads.@threads for i in 1:dominating_n - member = dominating[i] - complexities[i] = compute_complexity(member, options) - losses[i] = member.loss - strings[i] = string_tree( - member.tree, options; variable_names=dataset.variable_names - ) - end - - s = let tmp_io = IOBuffer() - println(tmp_io, "Complexity,Loss,Equation") - for i in 1:dominating_n - println(tmp_io, "$complexities[i],$losses[i],\"$strings[i]\"") - end - - String(take!(tmp_io)) - end - for out_file in (output_file, output_file * ".bkup") - open(out_file, "w") do io - write(io, s) - end - end - end + save_to_file(dominating, j, dataset, options) end ################################################################### # Migration ####################################################### From 606a7f8a757b0a3aae9827d2355b8f3bf201b970 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 25 Apr 2024 01:51:04 +0100 Subject: [PATCH 8/9] fix: add missing `nout` in save_to_file --- src/SearchUtils.jl | 2 +- src/SymbolicRegression.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SearchUtils.jl b/src/SearchUtils.jl index 0698df98..ad3d905f 100644 --- a/src/SearchUtils.jl +++ b/src/SearchUtils.jl @@ -406,7 +406,7 @@ Base.@kwdef struct SearchState{ end function save_to_file( - dominating, j::Integer, dataset::Dataset{T,L}, options::Options + dominating, nout::Integer, j::Integer, dataset::Dataset{T,L}, options::Options ) where {T,L} output_file = options.output_file if nout > 1 diff --git a/src/SymbolicRegression.jl b/src/SymbolicRegression.jl index 26132e95..84ab4063 100644 --- a/src/SymbolicRegression.jl +++ b/src/SymbolicRegression.jl @@ -917,7 +917,7 @@ function _main_search_loop!( dominating = calculate_pareto_frontier(state.halls_of_fame[j]) if options.save_to_file - save_to_file(dominating, j, dataset, options) + save_to_file(dominating, nout, j, dataset, options) end ################################################################### # Migration ####################################################### From a34f40af0bca1797efadf7ae5773fad3dea54db3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 25 Apr 2024 21:00:47 +0100 Subject: [PATCH 9/9] ci: remove macOS 1.8 test --- .github/workflows/CI.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5911adea..263e37c7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -42,8 +42,6 @@ jobs: julia-version: "1" - os: macOS-latest julia-version: "1" - - os: macOS-latest - julia-version: "1.8" - os: ubuntu-latest julia-version: "~1.11.0-0"