From a88201586e0f99df19a987d3f26019ae1ab099d8 Mon Sep 17 00:00:00 2001 From: Savithru Jayasinghe <43912545+savithru-j@users.noreply.github.com> Date: Fri, 10 Nov 2023 18:26:19 -0500 Subject: [PATCH] Made solution data memory layout consistent between cpu and gpu for MultimodeNLSE. Moved initial pulse generation code to separate file. Updated unit test. --- examples/CMakeLists.txt | 4 + examples/fibre_2mode.cpp | 35 ++-- examples/fibre_2mode_gpu.cu | 23 ++- src/ode/GPUMultimodeNLSE.hpp | 24 +-- src/ode/InitialCondition.hpp | 36 ++++ src/ode/MultimodeNLSE.cpp | 266 +++++++----------------------- src/ode/MultimodeNLSE.hpp | 27 +-- test/CMakeLists.txt | 10 ++ test/ode/MultimodeNLSE_gputest.cu | 24 ++- 9 files changed, 156 insertions(+), 293 deletions(-) create mode 100644 src/ode/InitialCondition.hpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 1fc0d5b..d52bb03 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -24,6 +24,10 @@ foreach(exampleSrc ${EXAMPLE_SRCS}) if (exampleExt STREQUAL ".cu") target_compile_options(${exampleName} PRIVATE $<$:--ptxas-options=-v>) endif() + + if (OpenMP_CXX_FOUND) + target_compile_options(${exampleName} PRIVATE $<$: -Xcompiler=-fopenmp>) + endif() endif() #link to libraries and dependencies diff --git a/examples/fibre_2mode.cpp b/examples/fibre_2mode.cpp index a55558a..1cf39a3 100644 --- a/examples/fibre_2mode.cpp +++ b/examples/fibre_2mode.cpp @@ -3,6 +3,15 @@ // Licensed under the MIT License (http://opensource.org/licenses/MIT) // Copyright (c) 2023, Savithru Jayasinghe +#include +#include +#include +#include + +#ifdef ENABLE_OPENMP +#include +#endif + #include #include #include @@ -10,23 +19,15 @@ #include #include #include -#include -#include -#include -#include #include "ode/MultimodeNLSE.hpp" - -#ifdef ENABLE_OPENMP -#include -#endif +#include "ode/InitialCondition.hpp" using namespace autodiffeq; int main() { using Complex = complex; - // using ComplexAD = ADVar; using clock = std::chrono::high_resolution_clock; #ifdef ENABLE_OPENMP @@ -37,7 +38,7 @@ int main() std::cout << "No. of CPU threads available: " << num_threads << std::endl; const int num_modes = 2; - const int num_time_points = 8193; //8192; + const int num_time_points = 8193; Array2D beta_mat_5x8 = {{ 0.00000000e+00, -5.31830434e+03, -5.31830434e+03, -1.06910098e+04, -1.06923559e+04, -1.07426928e+04, -2.16527479e+04, -3.26533894e+04}, @@ -69,10 +70,8 @@ int main() Array1D Et = {9.0, 8.0}; //nJ (in range [6,30] nJ) Array1D t_FWHM = {0.1, 0.2}; //ps (in range [0.05, 0.5] ps) Array1D t_center = {0.0, 0.0}; //ps - Array1D sol0 = ode.GetInitialSolutionGaussian(Et, t_FWHM, t_center); - - // for (int i = 0; i < num_time_points; ++i) - // std::cout << std::abs(sol0(i)) << ", " << std::abs(sol0(num_time_points + i)) << std::endl; + Array1D sol0; + ComputeGaussianPulse(Et, t_FWHM, t_center, ode.GetTimeVector(), sol0); double z_start = 0, z_end = 7.5; //[m] int nz = 15000*20; @@ -85,6 +84,7 @@ int main() std::cout << "Problem parameters:\n" << " Time range : [" << tmin << ", " << tmax << "] ps\n" << " Z max : " << z_end << " m\n" + << " No. of time points : " << num_time_points << "\n" << " No. of z-steps : " << nz << "\n" << " Solution storage freq.: " << "Every " << storage_stride << " steps\n" << std::endl; @@ -108,14 +108,13 @@ int main() std::cout << "Writing solution file: " << filename << std::endl; std::ofstream f(filename, std::ios_base::out | std::ios::binary); f << std::setprecision(6) << std::scientific; - const int offset = mode*num_time_points; for (int i = 0; i < sol_hist.GetNumSteps(); ++i) { if (i % storage_stride == 0) { - for (int j = 0; j < num_time_points-1; ++j) - f << abs(sol_hist(i, offset + j)) << ", "; - f << abs(sol_hist(i, offset + num_time_points-1)) << std::endl; + for (int j = 0; j < num_time_points-1; ++j) + f << abs(sol_hist(i, j*num_modes + mode)) << ", "; + f << abs(sol_hist(i, (num_time_points-1)*num_modes + mode)) << std::endl; } } f.close(); diff --git a/examples/fibre_2mode_gpu.cu b/examples/fibre_2mode_gpu.cu index c07ffd8..7d74955 100644 --- a/examples/fibre_2mode_gpu.cu +++ b/examples/fibre_2mode_gpu.cu @@ -3,17 +3,18 @@ // Licensed under the MIT License (http://opensource.org/licenses/MIT) // Copyright (c) 2023, Savithru Jayasinghe +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include #include "ode/GPUMultimodeNLSE.hpp" +#include "ode/InitialCondition.hpp" using namespace autodiffeq; @@ -52,15 +53,12 @@ int main() GPUMultimodeNLSE ode(num_modes, num_time_points, tmin, tmax, beta_mat, n2, omega0, Sk, is_self_steepening, is_nonlinear); - // MultimodeNLSE ode(num_modes, num_time_points, tmin, tmax, beta_mat); Array1D Et = {9.0, 8.0}; //nJ (in range [6,30] nJ) Array1D t_FWHM = {0.1, 0.2}; //ps (in range [0.05, 0.5] ps) Array1D t_center = {0.0, 0.0}; //ps - Array1D sol0 = ode.GetInitialSolutionGaussian(Et, t_FWHM, t_center); - - // for (int i = 0; i < num_time_points; ++i) - // std::cout << std::abs(sol0(i)) << ", " << std::abs(sol0(num_time_points + i)) << std::endl; + Array1D sol0; + ComputeGaussianPulse(Et, t_FWHM, t_center, ode.GetTimeVector(), sol0); double z_start = 0, z_end = 7.5; //[m] int nz = 15000*20; @@ -73,6 +71,7 @@ int main() std::cout << "Problem parameters:\n" << " Time range : [" << tmin << ", " << tmax << "] ps\n" << " Z max : " << z_end << " m\n" + << " No. of time points : " << num_time_points << "\n" << " No. of z-steps : " << nz << "\n" << " Solution storage freq.: Every " << storage_stride << " steps\n" << std::endl; @@ -101,9 +100,9 @@ int main() { if (i % storage_stride == 0) { - for (int j = 0; j < num_time_points-1; ++j) - f << abs(sol_hist(i, j*num_modes + mode)) << ", "; - f << abs(sol_hist(i, (num_time_points-1)*num_modes + mode)) << std::endl; + for (int j = 0; j < num_time_points-1; ++j) + f << abs(sol_hist(i, j*num_modes + mode)) << ", "; + f << abs(sol_hist(i, (num_time_points-1)*num_modes + mode)) << std::endl; } } f.close(); diff --git a/src/ode/GPUMultimodeNLSE.hpp b/src/ode/GPUMultimodeNLSE.hpp index 6affbc3..ab48672 100644 --- a/src/ode/GPUMultimodeNLSE.hpp +++ b/src/ode/GPUMultimodeNLSE.hpp @@ -78,8 +78,8 @@ void AddDispersionStencil5(const int num_modes, const int num_time_pts, //Calculate solution time-derivatives using stencil data T sol_tderiv1 = 0.5*(sol_ip1 - sol_im1) * inv_dt; T sol_tderiv2 = (sol_ip1 - 2.0*sol_i + sol_im1) * inv_dt2; - T sol_tderiv3 = (0.5*sol_ip2 - sol_ip1 + sol_im1 - 0.5*sol_im2) * inv_dt3; - T sol_tderiv4 = (sol_ip2 - 4.0*sol_ip1 + 6.0*sol_i - 4.0*sol_im1 + sol_im2) * inv_dt4; + T sol_tderiv3 = (0.5*(sol_ip2 - sol_im2) - sol_ip1 + sol_im1) * inv_dt3; + T sol_tderiv4 = (sol_ip2 - 4.0*(sol_ip1 + sol_im1) + 6.0*sol_i + sol_im2) * inv_dt4; #if 0 if (t == 128 && p == 0) { @@ -292,6 +292,7 @@ class GPUMultimodeNLSE : public ODE } int GetSolutionSize() const { return num_modes_*num_time_points_; } + const Array1D& GetTimeVector() const { return tvec_; } void EvalRHS(const GPUArray1D& sol, int step, double z, GPUArray1D& rhs) { @@ -365,25 +366,6 @@ class GPUMultimodeNLSE : public ODE #endif } - Array1D GetInitialSolutionGaussian(const Array1D& Et, const Array1D& t_FWHM, const Array1D& t_center) - { - assert(num_modes_ == (int) Et.size()); - assert(num_modes_ == (int) t_FWHM.size()); - assert(num_modes_ == (int) t_center.size()); - Array1D sol(GetSolutionSize()); - - for (int mode = 0; mode < num_modes_; ++mode) - { - const double A = std::sqrt(1665.0*Et(mode) / ((double)num_modes_ * t_FWHM(mode) * std::sqrt(M_PI))); - const double k = -1.665*1.665/(2.0*t_FWHM(mode)*t_FWHM(mode)); - const double& tc = t_center(mode); - - for (int j = 0; j < num_time_points_; ++j) - sol(j*num_modes_ + mode) = A * std::exp(k*(tvec_(j)-tc)*(tvec_(j)-tc)); - } - return sol; - } - protected: const int num_modes_; const int num_time_points_; diff --git a/src/ode/InitialCondition.hpp b/src/ode/InitialCondition.hpp new file mode 100644 index 0000000..57a66ec --- /dev/null +++ b/src/ode/InitialCondition.hpp @@ -0,0 +1,36 @@ +// nlse++ +// C++ library for solving the nonlinear Schrödinger equation +// Licensed under the MIT License (http://opensource.org/licenses/MIT) +// Copyright (c) 2023, Savithru Jayasinghe + +#define _USE_MATH_DEFINES //For MSVC +#include + +#include + +namespace autodiffeq +{ + +template +inline void ComputeGaussianPulse(const Array1D& Et, const Array1D& t_FWHM, const Array1D& t_center, + const Array1D& time_vec, Array1D& sol) +{ + const int num_modes = (int) Et.size(); + const int num_time_points = (int) time_vec.size(); + assert(num_modes == (int) t_FWHM.size()); + assert(num_modes == (int) t_center.size()); + sol.resize(num_modes * num_time_points); + + for (int mode = 0; mode < num_modes; ++mode) + { + const double A = std::sqrt(1665.0*Et(mode) / ((double)num_modes * t_FWHM(mode) * std::sqrt(M_PI))); + const double k = -1.665*1.665/(2.0*t_FWHM(mode)*t_FWHM(mode)); + const double& tc = t_center(mode); + + #pragma omp parallel for + for (int j = 0; j < num_time_points; ++j) + sol(j*num_modes + mode) = A * std::exp(k*(time_vec(j)-tc)*(time_vec(j)-tc)); + } +} + +} \ No newline at end of file diff --git a/src/ode/MultimodeNLSE.cpp b/src/ode/MultimodeNLSE.cpp index 682573f..ff65052 100644 --- a/src/ode/MultimodeNLSE.cpp +++ b/src/ode/MultimodeNLSE.cpp @@ -33,14 +33,7 @@ MultimodeNLSE::MultimodeNLSE( assert(Sk.GetDim(d) == num_modes); const int max_Ap_tderiv = beta_mat_.GetNumRows()-1; - sol_tderiv_.resize(max_Ap_tderiv, num_time_points_); //Stores the time-derivatives (e.g. d/dt, d^2/dt^2 ...) of a particular solution mode - - if (is_nonlinear_) - { - kerr_.resize(num_time_points_); - if (is_self_steepening_) - kerr_tderiv_.resize(num_time_points_); - } + sol_tderiv_.resize(max_Ap_tderiv, num_time_points_, T(0)); //Stores the time-derivatives (e.g. d/dt, d^2/dt^2 ...) of a particular solution mode } template @@ -56,7 +49,6 @@ MultimodeNLSE::EvalRHS(const Array1D& sol, int step, double z, Array1D& for (int p = 0; p < num_modes_; ++p) { - const int offset = p*num_time_points_; const auto beta0p = beta_mat_(0,p); const auto beta1p = beta_mat_(1,p); const auto beta2p = beta_mat_(2,p); @@ -65,132 +57,72 @@ MultimodeNLSE::EvalRHS(const Array1D& sol, int step, double z, Array1D& ComputeTimeDerivativesOrder4(p, sol, sol_tderiv_); #pragma omp for - for (int i = 0; i < num_time_points_; ++i) + for (int t = 0; t < num_time_points_; ++t) { - rhs(offset+i) = imag*(beta0p - beta00)*sol(offset+i) - -(beta1p - beta10)*sol_tderiv_(0,i) //d/dt - - imag* beta2p*0.5 *sol_tderiv_(1,i) //d^2/dt^2 - + beta3p*inv6 *sol_tderiv_(2,i) //d^3/dt^3 - + imag* beta4p*inv24 *sol_tderiv_(3,i); //d^4/dt^4 + const int offset = num_modes_*t; + rhs(offset+p) = imag*(beta0p - beta00)*sol(offset+p) + -(beta1p - beta10)*sol_tderiv_(0,t) //d/dt + - imag* beta2p*0.5 *sol_tderiv_(1,t) //d^2/dt^2 + + beta3p*inv6 *sol_tderiv_(2,t) //d^3/dt^3 + + imag* beta4p*inv24 *sol_tderiv_(3,t); //d^4/dt^4 } if (is_nonlinear_) { - ComputeKerrNonlinearity(p, sol); - if (is_self_steepening_) - { - - } - else + #pragma omp for + for (int t = 0; t < num_time_points_; ++t) { - #pragma omp for - for (int i = 0; i < num_time_points_; ++i) + const int offset = num_modes_*t; + const T* sol_modes = sol.data() + offset; + T kerr = T(0); + for (int q = 0; q < num_modes_; ++q) { - rhs(offset+i) += j_n_omega0_invc*kerr_(i); + const auto& Aq = sol_modes[q]; + for (int r = 0; r < num_modes_; ++r) + { + const auto& Ar = sol_modes[r]; + for (int s = 0; s < num_modes_; ++s) + { + const auto& As = sol_modes[s]; + kerr += Sk_(p,q,r,s) * Aq * Ar * conj(As); + } + } } + rhs(offset+p) += j_n_omega0_invc*kerr; } } } } -template -void -MultimodeNLSE::ComputeKerrNonlinearity(const int p, const Array1D& sol) -{ - #pragma omp for - for (int i = 0; i < num_time_points_; ++i) - { - T sum = 0.0; - for (int q = 0; q < num_modes_; ++q) - { - const auto& Aq = sol(q*num_time_points_ + i); - for (int r = 0; r < num_modes_; ++r) - { - const auto& Ar = sol(r*num_time_points_ + i); - for (int s = 0; s < num_modes_; ++s) - { - const auto& As = sol(s*num_time_points_ + i); - sum += Sk_(p,q,r,s)*Aq*Ar*conj(As); - } - } - } - kerr_(i) = sum; - } -} - template void MultimodeNLSE::ComputeTimeDerivativesOrder2(const int mode, const Array1D& sol, Array2D& tderiv) { - const int offset = mode*num_time_points_; const int max_deriv = tderiv.GetNumRows(); assert(max_deriv >= 2 && max_deriv <= 4); - const double inv_dt2 = 1.0 / (dt_*dt_); - - #pragma omp single - { - //First derivative d/dt - tderiv(0,0) = 0.0; - tderiv(0,num_time_points_-1) = 0.0; - - //Second derivative d^2/dt^2 - tderiv(1,0) = 2.0*(sol(offset+1) - sol(offset))*inv_dt2; - tderiv(1,num_time_points_-1) = 2.0*(sol(offset+num_time_points_-2) - - sol(offset+num_time_points_-1))*inv_dt2; - } + const double inv_dt = 1.0/dt_; + const double inv_dt2 = inv_dt*inv_dt; + const double inv_dt3 = inv_dt2*inv_dt; + const double inv_dt4 = inv_dt3*inv_dt; #pragma omp for - for (int i = 1; i < num_time_points_-1; ++i) - { - tderiv(0,i) = (sol(offset+i+1) - sol(offset+i-1))/(2.0*dt_); //d/dt - tderiv(1,i) = (sol(offset+i+1) - 2.0*sol(offset+i) + sol(offset+i-1))*inv_dt2; //d^2/dt^2 - } - - if (max_deriv >= 3) - { - const double inv_dt3 = 1.0 / (dt_*dt_*dt_); - - #pragma omp single - { - //Third derivative d^3/dt^3 - tderiv(2,0) = 0.0; - tderiv(2,1) = (0.5*sol(offset+3) - sol(offset+2) - + sol(offset) - 0.5*sol(offset+1))*inv_dt3; - tderiv(2,num_time_points_-2) = (0.5*sol(offset+num_time_points_-2) - sol(offset+num_time_points_-1) - + sol(offset+num_time_points_-3) - 0.5*sol(offset+num_time_points_-4))*inv_dt3; - tderiv(2,num_time_points_-1) = 0.0; - } - - #pragma omp for - for (int i = 2; i < num_time_points_-2; ++i) - { - tderiv(2,i) = (0.5*sol(offset+i+2) - sol(offset+i+1) - + sol(offset+i-1) - 0.5*sol(offset+i-2))*inv_dt3; //d^3/dt^3 - } - } - - if (max_deriv >= 4) + for (int t = 0; t < num_time_points_; ++t) { - const double inv_dt4 = 1.0 / (dt_*dt_*dt_*dt_); - - #pragma omp single - { - //Fourth derivative d^4/dt^4 - tderiv(3,0) = (2.0*sol(offset+2) - 8.0*sol(offset+1) + 6.0*sol(offset))*inv_dt4; - tderiv(3,1) = (sol(offset+3) - 4.0*sol(offset+2) + 7.0*sol(offset+1) - 4.0*sol(offset))*inv_dt4; - tderiv(3,num_time_points_-2) = (- 4.0*sol(offset+num_time_points_-1) + 7.0*sol(offset+num_time_points_-2) - - 4.0*sol(offset+num_time_points_-3) + sol(offset+num_time_points_-4))*inv_dt4; - tderiv(3,num_time_points_-1) = (2.0*sol(offset+num_time_points_-3) - 8.0*sol(offset+num_time_points_-2) - + 6.0*sol(offset+num_time_points_-1))*inv_dt4; - } - - #pragma omp for - for (int i = 2; i < num_time_points_-2; ++i) - { - tderiv(3,i) = (sol(offset+i+2) - 4.0*sol(offset+i+1) + 6.0*sol(offset+i) - - 4.0*sol(offset+i-1) + sol(offset+i-2))*inv_dt4; //d^4/dt^4 - } + //Get solutions at 5 stencil points: mirror data at left and right boundaries + T sol_im2 = (t >= 2) ? sol(num_modes_*(t-2) + mode) : sol(num_modes_*(2-t) + mode); + T sol_im1 = (t >= 1) ? sol(num_modes_*(t-1) + mode) : sol(num_modes_*(1-t) + mode); + const T& sol_i = sol(num_modes_*t + mode); + T sol_ip1 = (t < num_time_points_-1) ? sol(num_modes_*(t+1) + mode) + : sol(num_modes_*(2*num_time_points_ - t - 3) + mode); + T sol_ip2 = (t < num_time_points_-2) ? sol(num_modes_*(t+2) + mode) + : sol(num_modes_*(2*num_time_points_ - t - 4) + mode); + + //Calculate solution time-derivatives using stencil data + tderiv(0,t) = 0.5*(sol_ip1 - sol_im1) * inv_dt; //d/dt + tderiv(1,t) = (sol_ip1 - 2.0*sol_i + sol_im1) * inv_dt2; + tderiv(2,t) = (0.5*(sol_ip2 - sol_im2) - sol_ip1 + sol_im1) * inv_dt3; + tderiv(3,t) = (sol_ip2 - 4.0*(sol_ip1 + sol_im1) + 6.0*sol_i + sol_im2) * inv_dt4; } } @@ -198,104 +130,34 @@ template void MultimodeNLSE::ComputeTimeDerivativesOrder4(const int mode, const Array1D& sol, Array2D& tderiv) { - const int offset = mode*num_time_points_; const int max_deriv = tderiv.GetNumRows(); assert(max_deriv >= 2 && max_deriv <= 4); const double inv_12dt = 1.0/(12.0*dt_); const double inv_12dt2 = inv_12dt / dt_; - - #pragma omp single - { - //First derivative d/dt - tderiv(0,0) = 0.0; - tderiv(0,1) = (sol(offset+1) - 8.0*(sol(offset) - sol(offset+2)) - sol(offset+3))*inv_12dt; - tderiv(0,num_time_points_-2) = (sol(offset+num_time_points_-4) - 8.0*(sol(offset+num_time_points_-3) - sol(offset+num_time_points_-1)) - -sol(offset+num_time_points_-2))*inv_12dt; - tderiv(0,num_time_points_-1) = 0.0; - - //Second derivative d^2/dt^2 - tderiv(1,0) = (-2.0*sol(offset+2) + 32.0*sol(offset+1) - 30.0*sol(offset))*inv_12dt2; - tderiv(1,1) = (-31.0*sol(offset+1) + 16.0*(sol(offset) + sol(offset+2)) - sol(offset+3))*inv_12dt2; - tderiv(1,num_time_points_-2) = (-sol(offset+num_time_points_-4) + 16.0*(sol(offset+num_time_points_-3) + sol(offset+num_time_points_-1)) - - 31.0*sol(offset+num_time_points_-2))*inv_12dt2; - tderiv(1,num_time_points_-1) = (-2.0*sol(offset+num_time_points_-3) + 32.0*sol(offset+num_time_points_-2) - - 30.0*sol(offset+num_time_points_-1))*inv_12dt2; - } + const double inv_8dt3 = 1.0 / (8.0*dt_*dt_*dt_); + const double inv_6dt4 = 1.0 / (6.0*dt_*dt_*dt_*dt_); #pragma omp for - for (int i = 2; i < num_time_points_-2; ++i) - { - tderiv(0,i) = (sol(offset+i-2) - 8.0*(sol(offset+i-1) - sol(offset+i+1)) - sol(offset+i+2))*inv_12dt; //d/dt - tderiv(1,i) = (-sol(offset+i-2) + 16.0*(sol(offset+i-1) + sol(offset+i+1)) - - 30.0*sol(offset+i) - sol(offset+i+2))*inv_12dt2; //d^2/dt^2 - } - - if (max_deriv >= 3) + for (int t = 0; t < num_time_points_; ++t) { - const double inv_8dt3 = 1.0 / (8.0*dt_*dt_*dt_); - - //Third derivative d^3/dt^3 - #pragma omp single - { - tderiv(2,0) = 0.0; - tderiv(2,1) = (sol(offset+2) - 8.0*(sol(offset+1) - sol(offset+3)) - + 13.0*(sol(offset) - sol(offset+2)) - sol(offset+4))*inv_8dt3; - tderiv(2,2) = (sol(offset+1) - 8.0*(sol(offset) - sol(offset+4)) - + 13.0*(sol(offset+1) - sol(offset+3)) - sol(offset+5))*inv_8dt3; - } - - #pragma omp for - for (int i = 3; i < num_time_points_-3; ++i) - { - tderiv(2,i) = (sol(offset+i-3) - 8.0*(sol(offset+i-2) - sol(offset+i+2)) - + 13.0*(sol(offset+i-1) - sol(offset+i+1)) - sol(offset+i+3))*inv_8dt3; //d^3/dt^3 - } - - #pragma omp single - { - tderiv(2,num_time_points_-3) = (sol(offset+num_time_points_-6) - 8.0*(sol(offset+num_time_points_-5) - sol(offset+num_time_points_-1)) - + 13.0*(sol(offset+num_time_points_-4) - sol(offset+num_time_points_-2)) - sol(offset+num_time_points_-2))*inv_8dt3; - tderiv(2,num_time_points_-2) = (sol(offset+num_time_points_-5) - 8.0*(sol(offset+num_time_points_-4) - sol(offset+num_time_points_-2)) - + 13.0*(sol(offset+num_time_points_-3) - sol(offset+num_time_points_-1)) - sol(offset+num_time_points_-3))*inv_8dt3; - tderiv(2,num_time_points_-1) = 0.0; - } - } - - if (max_deriv >= 4) - { - const double inv_6dt4 = 1.0 / (6.0*dt_*dt_*dt_*dt_); - - //Fourth derivative d^4/dt^4 - #pragma omp single - { - tderiv(3,0) = (- 2.0*sol(offset+3) + 24.0*sol(offset+2) - 78.0*sol(offset+1) + 56.0*sol(offset))*inv_6dt4; - tderiv(3,1) = (- (sol(offset+2) + sol(offset+4)) + 12.0*(sol(offset+1) + sol(offset+3)) - - 39.0*(sol(offset) + sol(offset+2)) + 56.0* sol(offset+1))*inv_6dt4; - tderiv(3,2) = (- (sol(offset+1) + sol(offset+5)) + 12.0*(sol(offset) + sol(offset+4)) - - 39.0*(sol(offset+1) + sol(offset+3)) + 56.0* sol(offset+2))*inv_6dt4; - } - - #pragma omp for - for (int i = 3; i < num_time_points_-3; ++i) - { - tderiv(3,i) = (- (sol(offset+i-3) + sol(offset+i+3)) + 12.0*(sol(offset+i-2) + sol(offset+i+2)) - - 39.0*(sol(offset+i-1) + sol(offset+i+1)) + 56.0* sol(offset+i))*inv_6dt4; //d^4/dt^4 - } - - #pragma omp single - { - tderiv(3,num_time_points_-3) = (- (sol(offset+num_time_points_-6) + sol(offset+num_time_points_-2)) - + 12.0*(sol(offset+num_time_points_-5) + sol(offset+num_time_points_-1)) - - 39.0*(sol(offset+num_time_points_-4) + sol(offset+num_time_points_-2)) - + 56.0* sol(offset+num_time_points_-3))*inv_6dt4; - tderiv(3,num_time_points_-2) = (- (sol(offset+num_time_points_-5) + sol(offset+num_time_points_-3)) - + 12.0*(sol(offset+num_time_points_-4) + sol(offset+num_time_points_-2)) - - 39.0*(sol(offset+num_time_points_-3) + sol(offset+num_time_points_-1)) - + 56.0* sol(offset+num_time_points_-2))*inv_6dt4; - tderiv(3,num_time_points_-1) = (- 2.0*sol(offset+num_time_points_-4) + 24.0*sol(offset+num_time_points_-3) - - 78.0*sol(offset+num_time_points_-2) + 56.0*sol(offset+num_time_points_-1))*inv_6dt4; - } + //Get solutions at 7 stencil points: mirror data at left and right boundaries + T sol_im3 = (t >= 3) ? sol(num_modes_*(t-3) + mode) : sol(num_modes_*(3-t) + mode); + T sol_im2 = (t >= 2) ? sol(num_modes_*(t-2) + mode) : sol(num_modes_*(2-t) + mode); + T sol_im1 = (t >= 1) ? sol(num_modes_*(t-1) + mode) : sol(num_modes_*(1-t) + mode); + const T& sol_i = sol(num_modes_*t + mode); + T sol_ip1 = (t < num_time_points_-1) ? sol(num_modes_*(t+1) + mode) + : sol(num_modes_*(2*num_time_points_ - t - 3) + mode); + T sol_ip2 = (t < num_time_points_-2) ? sol(num_modes_*(t+2) + mode) + : sol(num_modes_*(2*num_time_points_ - t - 4) + mode); + T sol_ip3 = (t < num_time_points_-3) ? sol(num_modes_*(t+3) + mode) + : sol(num_modes_*(2*num_time_points_ - t - 5) + mode); + + //Calculate solution time-derivatives using stencil data + tderiv(0,t) = (sol_im2 - 8.0*(sol_im1 - sol_ip1) - sol_ip2) * inv_12dt; //d/dt + tderiv(1,t) = (-sol_im2 + 16.0*(sol_im1 + sol_ip1) - 30.0*sol_i - sol_ip2) * inv_12dt2; + tderiv(2,t) = (sol_im3 - 8.0*(sol_im2 - sol_ip2) + 13.0*(sol_im1 - sol_ip1) - sol_ip3) * inv_8dt3; + tderiv(3,t) = (-(sol_im3 + sol_ip3) + 12.0*(sol_im2 + sol_ip2) - 39.0*(sol_im1 + sol_ip1) + 56.0*sol_i) * inv_6dt4; } } diff --git a/src/ode/MultimodeNLSE.hpp b/src/ode/MultimodeNLSE.hpp index 03e00ee..6e7d081 100644 --- a/src/ode/MultimodeNLSE.hpp +++ b/src/ode/MultimodeNLSE.hpp @@ -34,36 +34,13 @@ class MultimodeNLSE : public ODE const bool is_self_steepening, const bool is_nonlinear = true); inline int GetSolutionSize() const { return num_modes_*num_time_points_; } + const Array1D& GetTimeVector() const { return tvec_; } void EvalRHS(const Array1D& sol, int step, double z, Array1D& rhs) override; - void ComputeKerrNonlinearity(const int p, const Array1D& sol); - void ComputeTimeDerivativesOrder2(const int mode, const Array1D& sol, Array2D& tderiv); - void ComputeTimeDerivativesOrder4(const int mode, const Array1D& sol, Array2D& tderiv); - Array1D GetInitialSolutionGaussian(const Array1D& Et, const Array1D& t_FWHM, const Array1D& t_center) - { - assert(num_modes_ == (int) Et.size()); - assert(num_modes_ == (int) t_FWHM.size()); - assert(num_modes_ == (int) t_center.size()); - Array1D sol(GetSolutionSize()); - - for (int mode = 0; mode < num_modes_; ++mode) - { - const int offset = mode*num_time_points_; - const double A = std::sqrt(1665.0*Et(mode) / ((double)num_modes_ * t_FWHM(mode) * std::sqrt(M_PI))); - const double k = -1.665*1.665/(2.0*t_FWHM(mode)*t_FWHM(mode)); - const double& tc = t_center(mode); - - #pragma omp parallel for - for (int j = 0; j < num_time_points_; ++j) - sol(offset + j) = A * std::exp(k*(tvec_(j)-tc)*(tvec_(j)-tc)); - } - return sol; - } - protected: const int num_modes_; const int num_time_points_; @@ -77,8 +54,6 @@ class MultimodeNLSE : public ODE const bool is_self_steepening_ = false; const bool is_nonlinear_ = false; Array2D sol_tderiv_; - Array1D kerr_; - Array1D kerr_tderiv_; }; } \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 056c383..a0dc541 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,6 +21,16 @@ foreach(testSrc ${TEST_SRCS}) #Add compile target add_executable(${testName}_build ${testSrc}) + if (AUTODIFFEQ_ENABLE_CUDA) + # if (exampleExt STREQUAL ".cu") + # target_compile_options(${testName}_build PRIVATE $<$:--ptxas-options=-v>) + # endif() + + if (OpenMP_CXX_FOUND) + target_compile_options(${testName}_build PRIVATE $<$: -Xcompiler=-fopenmp>) + endif() + endif() + #link to libraries and dependencies target_link_libraries(${testName}_build PRIVATE ${NLSECPP_LIBS} gtest gtest_main) diff --git a/test/ode/MultimodeNLSE_gputest.cu b/test/ode/MultimodeNLSE_gputest.cu index ae4c562..0de9fcb 100644 --- a/test/ode/MultimodeNLSE_gputest.cu +++ b/test/ode/MultimodeNLSE_gputest.cu @@ -23,10 +23,10 @@ TEST( MultimodeNLSE_2mode, EvalRHS_CPU_GPU_Consistency ) { 1.51681819e-04, 1.52043264e-04}, {-4.95686317e-07, -4.97023237e-07}}; - Array4D Sk(2,2,2,2, {4.9840660e+09, 0.0000000e+00, 0.0000000e+00, 2.5202004e+09, - 0.0000000e+00, 2.5202004e+09, 2.5202004e+09, 0.0000000e+00, - 0.0000000e+00, 2.5202004e+09, 2.5202004e+09, 0.0000000e+00, - 2.5202004e+09, 0.0000000e+00, 0.0000000e+00, 3.7860385e+09}); + Array4D Sk(2,2,2,2, { 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, + -2.0, -3.0, -4.0, -5.0, + 0.5, 0.7, -0.3, 0.1}); double tmin = -20, tmax = 20; double n2 = 2.3e-20; @@ -50,24 +50,20 @@ TEST( MultimodeNLSE_2mode, EvalRHS_CPU_GPU_Consistency ) GPUMultimodeNLSE ode_gpu(num_modes, num_time_points, tmin, tmax, beta_mat, n2, omega0, Sk, is_self_steepening, is_nonlinear); - //TODO: CPU and GPU have different data orderings, so need to copy elements to transformed indices - Array1D sol_cpu_colmajor(sol_size); - for (int t = 0; t < num_time_points; ++t) - for (int p = 0; p < num_modes; ++p) - sol_cpu_colmajor(t*num_modes + p) = sol_cpu(p*num_time_points + t); - - GPUArray1D sol_gpu(sol_cpu_colmajor); + GPUArray1D sol_gpu(sol_cpu); GPUArray1D rhs_gpu(sol_size); ode_gpu.EvalRHS(sol_gpu, step, z, rhs_gpu); auto rhs_gpu_host = rhs_gpu.CopyToHost(); //Check the consistency of CPU and GPU residuals + const double tol = 1e-13; for (int t = 0; t < num_time_points; ++t) for (int p = 0; p < num_modes; ++p) { - const auto& r_cpu = rhs_cpu(p*num_time_points + t); + const auto& r_cpu = rhs_cpu(t*num_modes + p); const auto& r_gpu = rhs_gpu_host(t*num_modes + p); - EXPECT_DOUBLE_EQ(r_cpu.real(), r_gpu.real()); - EXPECT_DOUBLE_EQ(r_cpu.imag(), r_gpu.imag()); + // std::cout << "t: " << t << ", p: " << p << ", val: " << r_cpu.real() << ", " << r_gpu.real() << std::endl; + EXPECT_NEAR(r_cpu.real(), r_gpu.real(), tol); + EXPECT_NEAR(r_cpu.imag(), r_gpu.imag(), tol); } }