JuliaDynamics · kahaaga · Jan 16, 2024 · Jan 15, 2024 · Jan 16, 2024 · Jan 16, 2024
diff --git a/docs/refs.bib b/docs/refs.bib
@@ -18,6 +18,17 @@ @article{Li2019
   publisher={MDPI}
 }
 
+@article{Manis2017,
+  title={Bubble entropy: An entropy almost free of parameters},
+  author={Manis, George and Aktaruzzaman, MD and Sassi, Roberto},
+  journal={IEEE Transactions on Biomedical Engineering},
+  volume={64},
+  number={11},
+  pages={2711--2718},
+  year={2017},
+  publisher={IEEE}
+}
+
 @article{Zhou2023,
   title={Using missing dispersion patterns to detect determinism and nonlinearity in time series data},
   author={Zhou, Qin and Shang, Pengjian and Zhang, Boyi},

diff --git a/docs/src/complexity.md b/docs/src/complexity.md
@@ -50,3 +50,9 @@ entropy_complexity_curves
 ```@docs
 LempelZiv76
 ```
+
+## Bubble entropy
+
+```@docs
+BubbleEntropy
+```
diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
@@ -86,6 +86,12 @@ CosineSimilarityBinning
 Diversity
 ```
 
+### Bubble sort swaps
+
+```@docs
+BubbleSortSwaps
+```
+
 ### Spatial outcome spaces
 
 ```@docs
@@ -154,5 +160,6 @@ RectangularBinEncoding
 RelativeMeanEncoding
 RelativeFirstDifferenceEncoding
 UniqueElementsEncoding
+BubbleSortSwapsEncoding
 CombinationEncoding
 ```
diff --git a/src/complexity_measures/bubble_entropy.jl b/src/complexity_measures/bubble_entropy.jl
@@ -0,0 +1,72 @@
+export BubbleEntropy
+
+"""
+    BubbleEntropy <: ComplexityEstimator
+    BubbleEntropy(; m = 3, τ = 1, definition = Renyi(q = 2))
+
+The `BubbleEntropy` complexity estimator [Manis2017](@cite) is just a scaled difference
+between two entropies, each computed with the [`BubbleSortSwaps`](@ref) outcome space, for
+embedding dimensions `m + 1` and `m`, respectively. 
+
+[Manis2017](@citet) use the [`Renyi`](@ref) entropy of order `q = 2` as the 
+information measure `definition`, but here you can use any [`InformationMeasure`](@ref).
+
+## Definition
+
+For input data `x`, the "bubble entropy" is computed by first embedding the input data
+using embedding dimension `m` and embedding delay `τ` (call the embedded pts `y`), and 
+then computing the difference between the two entropies:
+
+```math
+BubbleEn_T(τ) = H_T(y, m + 1) - H_T(y, m)
+```
+
+where ``H_T(y, k)`` is the entropy of type ``T`` (e.g. [`Renyi`](@ref)) computed with 
+the input data `x` embedded to dimension ``k``. Use [`complexity`](@ref)
+to compute this non-normalized version. Use [`complexity_normalized`](@ref) to
+compute the normalized difference (as in [Manis2017](@citet)):
+
+```math
+BubbleEn_H(τ) = \\dfrac{H_T(x, m + 1) - H_T(x, m)}{max(H_T(x, m + 1)) - max(H_T(x, m))},
+```
+
+where the maximum of the entropies for dimensions `m` and `m + 1` are computed using
+[`information_maximum`](@ref).
+
+## Example
+
+```julia
+using ComplexityMeasures
+x = rand(1000)
+est = BubbleEntropy(m = 5, τ = 3)
+complexity(est, x)
+```
+"""
+Base.@kwdef struct BubbleEntropy{M, T, D} <: ComplexityEstimator
+    m::M = 3
+    τ::T = 1
+    definition::D = Renyi(q = 2)
+end
+
+function complexity(est::BubbleEntropy, x)
+    o_m = BubbleSortSwaps(m = est.m)
+    o_m⁺¹ = BubbleSortSwaps(m = est.m + 1)
+    h_m = information(est.definition, o_m, x)
+    h_m⁺¹ = information(est.definition, o_m⁺¹, x)
+    return h_m⁺¹ - h_m
+end
+
+function complexity_normalized(est::BubbleEntropy, x)
+    o_m = BubbleSortSwaps(m = est.m)
+    o_m⁺¹ = BubbleSortSwaps(m = est.m + 1)
+    h_m =  information(est.definition, o_m, x)
+    h_m⁺¹ =  information(est.definition, o_m⁺¹, x)
+
+    # The normalized factor as (I think) described in Manis et al. (2017).
+    # Their description is a bit unclear to me.
+    h_max_m = information_maximum(est.definition, o_m, x)
+    h_max_m⁺¹ = information_maximum(est.definition, o_m⁺¹, x)
+    norm_factor = (h_max_m⁺¹ - h_max_m) # maximum difference for dims `m` and `m + 1`
+
+    return (h_m⁺¹ - h_m)/norm_factor
+end
diff --git a/src/complexity_measures/complexity_measures.jl b/src/complexity_measures/complexity_measures.jl
@@ -4,3 +4,4 @@ include("reverse_dispersion_entropy.jl")
 include("missing_dispersion.jl")
 include("statistical_complexity.jl")
 include("lempel_ziv.jl")
+include("bubble_entropy.jl")
diff --git a/src/encoding_implementations/bubble_sort_swaps_encoding.jl b/src/encoding_implementations/bubble_sort_swaps_encoding.jl
@@ -0,0 +1,66 @@
+using StaticArrays
+export BubbleSortSwapsEncoding
+"""
+    BubbleSortSwapsEncoding <: Encoding
+    BubbleSortSwapsEncoding{m}()
+
+`BubbleSortSwapsEncoding` is used with [`encode`](@ref) to encode a length-`m` input
+vector `x` into an integer in the range `ω ∈ 0:((m*(m-1)) ÷ 2)`, by counting the number 
+of swaps required for the bubble sort algorithm to  sort `x` in ascending order. 
+
+[`decode`](@ref) is not implemented for this encoding.
+
+## Example
+
+```julia
+using ComplexityMeasures
+x = [1, 5, 3, 1, 2]
+e = BubbleSortSwapsEncoding{5}() # constructor type argument must match length of vector 
+encode(e, x)
+```
+"""
+struct BubbleSortSwapsEncoding{m, V <: AbstractVector} <: Encoding
+    x::V # tmp vector
+end
+
+function BubbleSortSwapsEncoding{m}() where {m}
+    if m < 100
+        v = zero(MVector{m, eltype(0.0)})
+        return BubbleSortSwapsEncoding{m, typeof(v)}(v)
+    else
+        v = zeros(m)
+        return BubbleSortSwapsEncoding{m, typeof(v)}(v)
+    end
+end
+
+function encode(encoding::BubbleSortSwapsEncoding, x::AbstractVector)
+    return n_swaps_for_bubblesort(encoding, x)
+end
+
+# super naive bubble sort
+function n_swaps_for_bubblesort(encoding::BubbleSortSwapsEncoding, state_vector)
+    (; x) = encoding
+    x .= state_vector
+    L = length(state_vector)
+    n = 0
+    swapped = true
+    while swapped
+        swapped = false
+        n_swaps = 0
+        for j = 1:(L - 1)
+            if x[j] > x[j+1]
+                n_swaps += 1
+                x[j], x[j+1] = x[j+1], x[j] # move smallest element to the right
+            end
+        end
+        if iszero(n_swaps)
+            return n
+        else
+            swapped = true
+            n += n_swaps
+        end
+    end
+    return n
+end
+
+# there's no meaningful way to define `decode`, so it is not implemented.
diff --git a/src/encoding_implementations/encoding_implementations.jl b/src/encoding_implementations/encoding_implementations.jl
@@ -5,4 +5,5 @@ include("ordinal_pattern.jl")
 include("relative_mean_encoding.jl")
 include("relative_first_difference_encoding.jl")
 include("unique_elements_encoding.jl")
+include("bubble_sort_swaps_encoding.jl")
 include("combination_encoding.jl")
diff --git a/src/outcome_spaces/bubble_sort_swaps.jl b/src/outcome_spaces/bubble_sort_swaps.jl
@@ -0,0 +1,76 @@
+export BubbleSortSwaps
+
+"""
+    BubbleSortSwaps <: CountBasedOutcomeSpace
+    BubbleSortSwaps(; m = 3, τ = 1)
+
+The `BubbleSortSwaps` outcome space is based on [Manis2017](@citet)'s 
+paper on "bubble entropy". 
+
+## Description
+
+`BubbleSortSwaps` does the following:
+
+- Embeds the input data using embedding dimension `m` and  embedding lag `τ`
+- For each state vector in the embedding, counting how many swaps are necessary for
+    the bubble sort algorithm to sort state vectors.
+
+For [`counts_and_outcomes`](@ref), we then define a distribution over the number of 
+necessary swaps. This distribution can then be used to estimate probabilities using 
+[`probabilities_and_outcomes`](@ref), which again can be used to estimate any 
+[`InformationMeasure`](@ref). An example of how to compute the "Shannon bubble entropy"
+is given below.
+
+## Outcome space
+
+The [`outcome_space`](@ref) for `BubbleSortSwaps` are the integers
+`0:N`, where `N = (m * (m - 1)) / 2 + 1` (the worst-case number of swaps). Hence,
+the number of [`total_outcomes`](@ref) is `N + 1`.
+
+## Implements 
+
+- [`codify`](@ref). Returns the number of swaps required for each embedded state vector.
+
+## Examples
+
+With the `BubbleSortSwaps` outcome space, we can easily compute a "bubble entropy"
+inspired by [Manis2017](@cite). Note: this is not actually a new entropy - it is just 
+a new way of discretizing the input data. To reproduce the bubble entropy measure
+from [Manis2017](@cite), see [`BubbleEntropy`](@ref).
+
+```julia
+using ComplexityMeasures
+x = rand(100000)
+o = BubbleSortSwaps(; m = 5) # 5-dimensional embedding vectors
+probs, outs = probabilities_and_outcomes(o, x)
+information(Shannon(; base = 2), probs)
+
+# Equivalent, without the extra step of storing the probabilities
+information(Shannon(; base = 2), o, x)
+
+# We can also compute any other "bubble quantity", for example the 
+# "Tsallis bubble extropy", with arbitrary probabilities estimators:
+information(TsallisExtropy(), BayesianRegularization(), o, x)
+```
+"""
+Base.@kwdef struct BubbleSortSwaps{M, T} <: CountBasedOutcomeSpace
+    m::M = 3
+    τ::T = 1
+end
+
+# Add one to the total number of possible swaps because it may happen that we don't 
+# need to swap.
+total_outcomes(o::BubbleSortSwaps{m}, x) where {m} = total_outcomes(o)
+total_outcomes(o::BubbleSortSwaps{m}) where {m} = round(Int, (o.m * (o.m - 1)) / 2) + 1
+outcome_space(o::BubbleSortSwaps{m}) where {m} = collect(0:total_outcomes(o) - 1)
+
+function counts_and_outcomes(o::BubbleSortSwaps, x)
+    observed_outs = codify(o, x)
+    return counts_and_outcomes(UniqueElements(), observed_outs)
+end
+
+function codify(o::BubbleSortSwaps, x)
+    encoding = BubbleSortSwapsEncoding{o.m}()
+    x_embedded = embed(x, o.m, o.τ).data
+    return encode.(Ref(encoding), x_embedded)
+end
diff --git a/src/outcome_spaces/outcome_spaces.jl b/src/outcome_spaces/outcome_spaces.jl
@@ -7,4 +7,5 @@ include("wavelet_overlap.jl")
 include("transfer_operator/transfer_operator.jl")
 include("dispersion.jl")
 include("cosine_similarity_binning.jl")
+include("bubble_sort_swaps.jl")
 include("spatial/spatial.jl")
diff --git a/test/complexity/complexity.jl b/test/complexity/complexity.jl
@@ -6,6 +6,7 @@
     testfile("measures/entropy_sample.jl")
     testfile("measures/statistical_complexity.jl")
     testfile("measures/lempel_ziv.jl")
+    testfile("measures/entropy_bubble.jl")
 
     testfile("missing_outcomes.jl")
 end
diff --git a/test/complexity/measures/entropy_bubble.jl b/test/complexity/measures/entropy_bubble.jl
@@ -0,0 +1,7 @@
+using Test, ComplexityMeasures
+using Random; rng = MersenneTwister(1234)
+
+x = rand(rng, 1000)
+est = BubbleEntropy(m = 5)
+@test complexity(est, x) isa Real
+@test 0.0 <= complexity_normalized(est, x) <= 1.0
diff --git a/test/complexity/measures/statistical_complexity.jl b/test/complexity/measures/statistical_complexity.jl
@@ -99,6 +99,7 @@ end
     x = randn(rng, 10000)
 
     # As with regular entropy, for extropy, the edge case of noise should be close to zeros
+    m, τ = 6, 1
     c = StatisticalComplexity(
         dist=JSDivergence(),
         est=OrdinalPatterns(; m, τ),

diff --git a/test/encodings/encodings.jl b/test/encodings/encodings.jl
@@ -5,4 +5,5 @@ testfile("encodings/gaussian_cdf_encoding.jl")
 testfile("encodings/ordinal_pattern_encoding.jl")
 testfile("encodings/rectangular_bin_encoding.jl")
 testfile("encodings/unique_elements_encoding.jl")
+testfile("encodings/bubble_sort_swaps_encoding.jl")
 testfile("encodings/combination_encoding.jl")
diff --git a/test/encodings/encodings/bubble_sort_swaps_encoding.jl b/test/encodings/encodings/bubble_sort_swaps_encoding.jl
@@ -0,0 +1,10 @@
+using Test, ComplexityMeasures
+using Random; rng = MersenneTwister(1234)
+using DelayEmbeddings
+
+x = rand(10000)
+m = 5
+x_embed = embed(x, m, 1)
+encoding = BubbleSortSwapsEncoding{m}()
+symbols = encode.(Ref(encoding), x_embed.data)
+@test all(0 .<= symbols .<= (m * (m - 1)) ÷ 2)
diff --git a/test/outcome_spaces/implementations/bubble_sort_swaps.jl b/test/outcome_spaces/implementations/bubble_sort_swaps.jl
@@ -0,0 +1,21 @@
+using ComplexityMeasures, Test
+using Random; rng = MersenneTwister(1234)
+
+# Constructor
+@test BubbleSortSwaps(; m = 3, τ = 1) isa BubbleSortSwaps
+@test BubbleSortSwaps(; m = 3, τ = 1) isa ComplexityMeasures.CountBasedOutcomeSpace
+
+# Codify
+x = rand(rng, 100000) # enough points to cover the outcome space for small `m`
+m = 3
+o = BubbleSortSwaps(; m = m, τ = 1)
+observed_outs = codify(o, x)
+@test length(observed_outs) == length(x) - (m - 1)
+
+# Outcomes
+o = BubbleSortSwaps(; m = 3, τ = 1)
+cts, outs = counts_and_outcomes(o, x)
+@test total_outcomes(o) ==  (m * (m - 1) / 2) + 1
+@test total_outcomes(o, x) ==  total_outcomes(o)
+@test outcome_space(o) == collect(0:(total_outcomes(o) - 1)) # 0 included, so 1 less
+@test outs == outcome_space(o) # should be enough points in `x` to be true
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -19,6 +19,7 @@ testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include
     testfile("outcome_spaces/implementations/timescales.jl")
     testfile("outcome_spaces/implementations/dispersion.jl")
     testfile("outcome_spaces/implementations/cosine_similarity_binning.jl")
+    testfile("outcome_spaces/implementations/bubble_sort_swaps.jl")
     testfile("outcome_spaces/implementations/spatial/spatial_ordinal_patterns.jl")
     testfile("outcome_spaces/implementations/spatial/spatial_dispersion.jl")