diff --git a/example/Burgers/src/Burgers_deeponet.jl b/example/Burgers/src/Burgers_deeponet.jl index 8688d250..4ef3adb5 100644 --- a/example/Burgers/src/Burgers_deeponet.jl +++ b/example/Burgers/src/Burgers_deeponet.jl @@ -1,32 +1,34 @@ -function train_don() - # if has_cuda() - # @info "CUDA is on" - # device = gpu - # CUDA.allowscalar(false) - # else +function train_don(; n=300, cuda=true, learning_rate=0.001, epochs=400) + if cuda && has_cuda() + @info "Training on GPU" + device = gpu + else + @info "Training on CPU" device = cpu - # end + end - x, y = get_data_don(n=300) - xtrain = x[1:280, :]' |> device - xval = x[end-19:end, :]' |> device + x, y = get_data_don(n=n) + + xtrain = x[1:280, :]' + ytrain = y[1:280, :] - ytrain = y[1:280, :] |> device + xval = x[end-19:end, :]' |> device yval = y[end-19:end, :] |> device - grid = collect(range(0, 1, length=1024))' |> device + grid = collect(range(0, 1, length=1024)') |> device - learning_rate = 0.001 opt = ADAM(learning_rate) - m = DeepONet((1024,1024,1024),(1,1024,1024),gelu,gelu) - loss(xtrain,ytrain,sensor) = Flux.Losses.mse(m(xtrain,sensor),ytrain) - evalcb() = @show(loss(xval,yval,grid)) + m = DeepONet((1024,1024,1024), (1,1024,1024), gelu, gelu) |> device + + loss(X, y, sensor) = Flux.Losses.mse(m(X, sensor), y) + evalcb() = @show(loss(xval, yval, grid)) - Flux.@epochs 400 Flux.train!(loss, params(m), [(xtrain,ytrain,grid)], opt, cb = evalcb) - ỹ = m(xval, grid) + data = [(xtrain, ytrain, grid)] |> device + Flux.@epochs epochs Flux.train!(loss, params(m), data, opt, cb=evalcb) + ỹ = m(xval |> device, grid |> device) - diffvec = vec(abs.((yval .- ỹ))) + diffvec = vec(abs.(cpu(yval) .- cpu(ỹ))) mean_diff = sum(diffvec)/length(diffvec) return mean_diff end diff --git a/src/DeepONet.jl b/src/DeepONet.jl index cbe5323b..0211f41a 100644 --- a/src/DeepONet.jl +++ b/src/DeepONet.jl @@ -116,7 +116,7 @@ function (a::DeepONet)(x::AbstractArray, y::AbstractVecOrMat) However, we perform the transformations by the NNs always in the first dim so we need to adjust (i.e. transpose) one of the inputs, which we do on the branch input here =# - return Array(branch(x)') * trunk(y) + return branch(x)' * trunk(y) end # Sensors stay the same and shouldn't be batched diff --git a/test/cuda.jl b/test/cuda.jl new file mode 100644 index 00000000..1e924b70 --- /dev/null +++ b/test/cuda.jl @@ -0,0 +1,18 @@ +@testset "CUDA" begin + @testset "DeepONet" begin + batch_size = 2 + a = [0.83541104, 0.83479851, 0.83404712, 0.83315711, 0.83212979, 0.83096755, + 0.82967374, 0.82825263, 0.82670928, 0.82504949, 0.82327962, 0.82140651, + 0.81943734, 0.81737952, 0.8152405, 0.81302771] + a = repeat(a, outer=(1, batch_size)) |> gpu + sensors = collect(range(0, 1, length=16)') + sensors = repeat(sensors, outer=(batch_size, 1)) |> gpu + model = DeepONet((16, 22, 30), (2, 16, 24, 30), σ, tanh; + init_branch=Flux.glorot_normal, bias_trunk=false) |> gpu + y = model(a, sensors) + @test size(y) == (batch_size, 16) + + mgrad = Flux.Zygote.gradient(() -> sum(model(a, sensors)), Flux.params(model)) + @test length(mgrad.grads) == 9 + end +end diff --git a/test/deeponet.jl b/test/deeponet.jl index 6da43369..c2d50f66 100644 --- a/test/deeponet.jl +++ b/test/deeponet.jl @@ -1,41 +1,34 @@ -using Test, Flux - @testset "DeepONet" begin - @testset "dimensions" begin - # Test the proper construction + @testset "proper construction" begin + deeponet = DeepONet((32,64,72), (24,48,72), σ, tanh) # Branch net - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).branch_net.layers[end].weight) == (72,64) - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).branch_net.layers[end].bias) == (72,) + @test size(deeponet.branch_net.layers[end].weight) == (72,64) + @test size(deeponet.branch_net.layers[end].bias) == (72,) # Trunk net - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).trunk_net.layers[end].weight) == (72,48) - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).trunk_net.layers[end].bias) == (72,) + @test size(deeponet.trunk_net.layers[end].weight) == (72,48) + @test size(deeponet.trunk_net.layers[end].bias) == (72,) end # Accept only Int as architecture parameters @test_throws MethodError DeepONet((32.5,64,72), (24,48,72), σ, tanh) @test_throws MethodError DeepONet((32,64,72), (24.1,48,72)) -end - -#Just the first 16 datapoints from the Burgers' equation dataset -a = [0.83541104, 0.83479851, 0.83404712, 0.83315711, 0.83212979, 0.83096755, 0.82967374, 0.82825263, 0.82670928, 0.82504949, 0.82327962, 0.82140651, 0.81943734, 0.81737952, 0.8152405, 0.81302771] -sensors = collect(range(0, 1, length=16))' - -model = DeepONet((16, 22, 30), (1, 16, 24, 30), σ, tanh; init_branch=Flux.glorot_normal, bias_trunk=false) - -model(a,sensors) -#forward pass -@test size(model(a, sensors)) == (1, 16) - -mgrad = Flux.Zygote.gradient((x,p)->sum(model(x,p)),a,sensors) - -#gradients -@test !iszero(Flux.Zygote.gradient((x,p)->sum(model(x,p)),a,sensors)[1]) -@test !iszero(Flux.Zygote.gradient((x,p)->sum(model(x,p)),a,sensors)[2]) - -#Output size of branch and trunk subnets should be same -branch = Chain(Dense(16, 22), Dense(22, 30)) -trunk = Chain(Dense(1, 16), Dense(16, 24), Dense(24, 32)) -m = DeepONet(branch, trunk) -@test_throws AssertionError DeepONet((32,64,70), (24,48,72), σ, tanh) -@test_throws DimensionMismatch m(a, sensors) + # Just the first 16 datapoints from the Burgers' equation dataset + a = [0.83541104, 0.83479851, 0.83404712, 0.83315711, 0.83212979, 0.83096755, + 0.82967374, 0.82825263, 0.82670928, 0.82504949, 0.82327962, 0.82140651, + 0.81943734, 0.81737952, 0.8152405, 0.81302771] + sensors = collect(range(0, 1, length=16)') + model = DeepONet((16, 22, 30), (1, 16, 24, 30), σ, tanh; init_branch=Flux.glorot_normal, bias_trunk=false) + y = model(a, sensors) + @test size(y) == (1, 16) + + mgrad = Flux.Zygote.gradient(() -> sum(model(a, sensors)), Flux.params(model)) + @test length(mgrad.grads) == 7 + + # Output size of branch and trunk subnets should be same + branch = Chain(Dense(16, 22), Dense(22, 30)) + trunk = Chain(Dense(1, 16), Dense(16, 24), Dense(24, 32)) + m = DeepONet(branch, trunk) + @test_throws AssertionError DeepONet((32,64,70), (24,48,72), σ, tanh) + @test_throws DimensionMismatch m(a, sensors) +end diff --git a/test/runtests.jl b/test/runtests.jl index 120a2107..90f03af3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,12 +1,31 @@ using NeuralOperators using Test using Flux +using CUDA + +CUDA.allowscalar(false) + +cuda_tests = [ + "cuda", +] + +tests = [ + "Transform/Transform", + "operator_kernel", + "model", + "deeponet", +] + +if CUDA.functional() + append!(tests, cuda_tests) +else + @warn "CUDA unavailable, not testing GPU support" +end @testset "NeuralOperators.jl" begin - include("Transform/Transform.jl") - include("operator_kernel.jl") - include("model.jl") - include("deeponet.jl") + for t in tests + include("$(t).jl") + end end #=