diff --git a/docs/src/examples.md b/docs/src/examples.md index 6d4dfe4a..d6f785f0 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -225,6 +225,9 @@ which can cancel out other units in the expression.) For example, would indicate that the expression is dimensionally consistent, with a constant `"2.6353e-22[m s⁻²]"`. +Note that you can also search for dimensionless units by settings +`dimensionless_constants_only` to `true`. + ## 7. Additional features For the many other features available in SymbolicRegression.jl, diff --git a/src/DimensionalAnalysis.jl b/src/DimensionalAnalysis.jl index a748f432..511492f8 100644 --- a/src/DimensionalAnalysis.jl +++ b/src/DimensionalAnalysis.jl @@ -116,12 +116,18 @@ end # Define dimensionally-aware evaluation routine: @inline function deg0_eval( - x::AbstractVector{T}, x_units::Vector{Q}, t::AbstractExpressionNode{T} + x::AbstractVector{T}, + x_units::Vector{Q}, + t::AbstractExpressionNode{T}, + allow_wildcards::Bool, ) where {T,R,Q<:AbstractQuantity{T,R}} - t.constant && return WildcardQuantity{Q}(Quantity(t.val, R), true, false) - return WildcardQuantity{Q}( - (@inbounds x[t.feature]) * (@inbounds x_units[t.feature]), false, false - ) + if t.constant + return WildcardQuantity{Q}(Quantity(t.val, R), allow_wildcards, false) + else + return WildcardQuantity{Q}( + (@inbounds x[t.feature]) * (@inbounds x_units[t.feature]), false, false + ) + end end @inline function deg1_eval( op::F, l::W @@ -149,16 +155,26 @@ end end function violates_dimensional_constraints_dispatch( - tree::AbstractExpressionNode{T}, x_units::Vector{Q}, x::AbstractVector{T}, operators + tree::AbstractExpressionNode{T}, + x_units::Vector{Q}, + x::AbstractVector{T}, + operators, + allow_wildcards, ) where {T,Q<:AbstractQuantity{T}} if tree.degree == 0 - return deg0_eval(x, x_units, tree)::WildcardQuantity{Q} + return deg0_eval(x, x_units, tree, allow_wildcards)::WildcardQuantity{Q} elseif tree.degree == 1 - l = violates_dimensional_constraints_dispatch(tree.l, x_units, x, operators) + l = violates_dimensional_constraints_dispatch( + tree.l, x_units, x, operators, allow_wildcards + ) return deg1_eval((@inbounds operators.unaops[tree.op]), l)::WildcardQuantity{Q} else - l = violates_dimensional_constraints_dispatch(tree.l, x_units, x, operators) - r = violates_dimensional_constraints_dispatch(tree.r, x_units, x, operators) + l = violates_dimensional_constraints_dispatch( + tree.l, x_units, x, operators, allow_wildcards + ) + r = violates_dimensional_constraints_dispatch( + tree.r, x_units, x, operators, allow_wildcards + ) return deg2_eval((@inbounds operators.binops[tree.op]), l, r)::WildcardQuantity{Q} end end @@ -186,8 +202,9 @@ function violates_dimensional_constraints( if X_units === nothing && y_units === nothing return false end + allow_wildcards = !(options.dimensionless_constants_only) dimensional_output = violates_dimensional_constraints_dispatch( - tree, X_units, x, options.operators + tree, X_units, x, options.operators, allow_wildcards ) # ^ Eventually do this with map_treereduce. However, right now it seems # like we are passing around too many arguments, which slows things down. diff --git a/src/InterfaceDynamicExpressions.jl b/src/InterfaceDynamicExpressions.jl index ac10c67b..20e62f48 100644 --- a/src/InterfaceDynamicExpressions.jl +++ b/src/InterfaceDynamicExpressions.jl @@ -170,7 +170,11 @@ Convert an equation to a string. tree, options.operators; f_variable=(feature, vname) -> string_variable(feature, vname, X_sym_units), - f_constant=(val,) -> string_constant(val, vprecision, WILDCARD_UNIT_STRING), + f_constant=let + unit_placeholder = + options.dimensionless_constants_only ? "" : WILDCARD_UNIT_STRING + (val,) -> string_constant(val, vprecision, unit_placeholder) + end, variable_names=display_variable_names, kws..., ) diff --git a/src/Options.jl b/src/Options.jl index bb409acd..239b32ad 100644 --- a/src/Options.jl +++ b/src/Options.jl @@ -277,6 +277,8 @@ const OPTION_DESCRIPTIONS = """- `binary_operators`: Vector of binary operators punished. - `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated. +- `dimensionless_constants_only`: Whether to only allow dimensionless + constants. - `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered @@ -387,6 +389,7 @@ function Options end complexity_of_variables::Union{Nothing,Real}=nothing, parsimony::Real=0.0032, dimensional_constraint_penalty::Union{Nothing,Real}=nothing, + dimensionless_constants_only::Bool=false, alpha::Real=0.100000, maxsize::Integer=20, maxdepth::Union{Nothing,Integer}=nothing, @@ -780,6 +783,7 @@ function Options end tournament_selection_weights, parsimony, dimensional_constraint_penalty, + dimensionless_constants_only, alpha, maxsize, maxdepth, diff --git a/src/OptionsStruct.jl b/src/OptionsStruct.jl index 01216cf7..ba73d42b 100644 --- a/src/OptionsStruct.jl +++ b/src/OptionsStruct.jl @@ -59,6 +59,7 @@ struct Options{ tournament_selection_weights::W parsimony::Float32 dimensional_constraint_penalty::Union{Float32,Nothing} + dimensionless_constants_only::Bool alpha::Float32 maxsize::Int maxdepth::Int diff --git a/test/test_units.jl b/test/test_units.jl index 0a677812..ae0855a3 100644 --- a/test/test_units.jl +++ b/test/test_units.jl @@ -33,6 +33,7 @@ using DynamicQuantities: using Test using MLJBase: MLJBase as MLJ using MLJModelInterface: MLJModelInterface as MMI +include("utils.jl") custom_op(x, y) = x + y @@ -369,6 +370,57 @@ end X_sym_units=dataset2.X_sym_units, y_sym_units=dataset2.y_sym_units, ) == "x₅[5.0 m] * 3.2[?]" + + # With dimensionless_constants_only, it will not print the [?]: + options = Options(; + binary_operators=[+, -, *, /], + unary_operators=[cos, sin], + dimensionless_constants_only=true, + ) + @test string_tree( + x5 * 3.2, + options; + raw=false, + display_variable_names=dataset2.display_variable_names, + X_sym_units=dataset2.X_sym_units, + y_sym_units=dataset2.y_sym_units, + ) == "x₅[5.0 m] * 3.2" +end + +@testset "Dimensionless constants" begin + options = Options(; + binary_operators=[+, -, *, /, square, cube], + unary_operators=[cos, sin], + dimensionless_constants_only=true, + ) + X = randn(5, 64) + y = randn(64) + dataset = Dataset(X, y; X_units=["m^3", "km/s", "kg", "hr", "1"], y_units="kg") + x1, x2, x3, x4, x5 = [Node(Float64; feature=i) for i in 1:5] + + dimensionally_valid_equations = [ + 1.5 * x1 / (cube(x2) * cube(x4)) * x3, x3, (square(x3) / x3) + x3 + ] + for tree in dimensionally_valid_equations + onfail(@test !violates_dimensional_constraints(tree, dataset, options)) do + @warn "Failed on" tree + end + end + dimensionally_invalid_equations = [Node(Float64; val=1.5), 1.5 * x1, x3 - 1.0 * x1] + for tree in dimensionally_invalid_equations + onfail(@test violates_dimensional_constraints(tree, dataset, options)) do + @warn "Failed on" tree + end + end + # But, all of these would be fine if we allow dimensionless constants: + let + options = Options(; binary_operators=[+, -, *, /], unary_operators=[cos, sin]) + for tree in dimensionally_invalid_equations + onfail(@test !violates_dimensional_constraints(tree, dataset, options)) do + @warn "Failed on" tree + end + end + end end @testset "Miscellaneous" begin diff --git a/test/utils.jl b/test/utils.jl new file mode 100644 index 00000000..35da7639 --- /dev/null +++ b/test/utils.jl @@ -0,0 +1,2 @@ +onfail(f, ::Test.Fail) = f() +onfail(_, ::Test.Pass) = nothing