Skip to content

Commit

Permalink
Made solution data memory layout consistent between cpu and gpu for M…
Browse files Browse the repository at this point in the history
…ultimodeNLSE. Moved initial pulse generation code to separate file. Updated unit test.
  • Loading branch information
savithru-j committed Nov 10, 2023
1 parent c539427 commit a882015
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 293 deletions.
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ foreach(exampleSrc ${EXAMPLE_SRCS})
if (exampleExt STREQUAL ".cu")
target_compile_options(${exampleName} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--ptxas-options=-v>)
endif()

if (OpenMP_CXX_FOUND)
target_compile_options(${exampleName} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler=-fopenmp>)
endif()
endif()

#link to libraries and dependencies
Expand Down
35 changes: 17 additions & 18 deletions examples/fibre_2mode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,31 @@
// Licensed under the MIT License (http://opensource.org/licenses/MIT)
// Copyright (c) 2023, Savithru Jayasinghe

#include <iostream>
#include <iomanip>
#include <fstream>
#include <chrono>

#ifdef ENABLE_OPENMP
#include <omp.h>
#endif

#include <autodiffeq/numerics/ADVar.hpp>
#include <autodiffeq/numerics/Complex.hpp>
#include <autodiffeq/solver/ForwardEuler.hpp>
#include <autodiffeq/solver/RungeKutta.hpp>
#include <autodiffeq/linearalgebra/Array2D.hpp>
#include <autodiffeq/linearalgebra/Array3D.hpp>
#include <autodiffeq/linearalgebra/Array4D.hpp>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <chrono>

#include "ode/MultimodeNLSE.hpp"

#ifdef ENABLE_OPENMP
#include <omp.h>
#endif
#include "ode/InitialCondition.hpp"

using namespace autodiffeq;

int main()
{
using Complex = complex<double>;
// using ComplexAD = ADVar<Complex>;
using clock = std::chrono::high_resolution_clock;

#ifdef ENABLE_OPENMP
Expand All @@ -37,7 +38,7 @@ int main()
std::cout << "No. of CPU threads available: " << num_threads << std::endl;

const int num_modes = 2;
const int num_time_points = 8193; //8192;
const int num_time_points = 8193;

Array2D<double> beta_mat_5x8 =
{{ 0.00000000e+00, -5.31830434e+03, -5.31830434e+03, -1.06910098e+04, -1.06923559e+04, -1.07426928e+04, -2.16527479e+04, -3.26533894e+04},
Expand Down Expand Up @@ -69,10 +70,8 @@ int main()
Array1D<double> Et = {9.0, 8.0}; //nJ (in range [6,30] nJ)
Array1D<double> t_FWHM = {0.1, 0.2}; //ps (in range [0.05, 0.5] ps)
Array1D<double> t_center = {0.0, 0.0}; //ps
Array1D<Complex> sol0 = ode.GetInitialSolutionGaussian(Et, t_FWHM, t_center);

// for (int i = 0; i < num_time_points; ++i)
// std::cout << std::abs(sol0(i)) << ", " << std::abs(sol0(num_time_points + i)) << std::endl;
Array1D<Complex> sol0;
ComputeGaussianPulse(Et, t_FWHM, t_center, ode.GetTimeVector(), sol0);

double z_start = 0, z_end = 7.5; //[m]
int nz = 15000*20;
Expand All @@ -85,6 +84,7 @@ int main()
std::cout << "Problem parameters:\n"
<< " Time range : [" << tmin << ", " << tmax << "] ps\n"
<< " Z max : " << z_end << " m\n"
<< " No. of time points : " << num_time_points << "\n"
<< " No. of z-steps : " << nz << "\n"
<< " Solution storage freq.: " << "Every " << storage_stride << " steps\n"
<< std::endl;
Expand All @@ -108,14 +108,13 @@ int main()
std::cout << "Writing solution file: " << filename << std::endl;
std::ofstream f(filename, std::ios_base::out | std::ios::binary);
f << std::setprecision(6) << std::scientific;
const int offset = mode*num_time_points;
for (int i = 0; i < sol_hist.GetNumSteps(); ++i)
{
if (i % storage_stride == 0)
{
for (int j = 0; j < num_time_points-1; ++j)
f << abs(sol_hist(i, offset + j)) << ", ";
f << abs(sol_hist(i, offset + num_time_points-1)) << std::endl;
for (int j = 0; j < num_time_points-1; ++j)
f << abs(sol_hist(i, j*num_modes + mode)) << ", ";
f << abs(sol_hist(i, (num_time_points-1)*num_modes + mode)) << std::endl;
}
}
f.close();
Expand Down
23 changes: 11 additions & 12 deletions examples/fibre_2mode_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
// Licensed under the MIT License (http://opensource.org/licenses/MIT)
// Copyright (c) 2023, Savithru Jayasinghe

#include <iostream>
#include <iomanip>
#include <fstream>
#include <chrono>
#include <autodiffeq/numerics/ADVar.hpp>
#include <autodiffeq/numerics/Complex.hpp>
#include <autodiffeq/solver/RungeKutta.hpp>
#include <autodiffeq/linearalgebra/Array2D.hpp>
#include <autodiffeq/linearalgebra/Array4D.hpp>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <chrono>

#include "ode/GPUMultimodeNLSE.hpp"
#include "ode/InitialCondition.hpp"

using namespace autodiffeq;

Expand Down Expand Up @@ -52,15 +53,12 @@ int main()

GPUMultimodeNLSE<Complex> ode(num_modes, num_time_points, tmin, tmax, beta_mat,
n2, omega0, Sk, is_self_steepening, is_nonlinear);
// MultimodeNLSE<Complex> ode(num_modes, num_time_points, tmin, tmax, beta_mat);

Array1D<double> Et = {9.0, 8.0}; //nJ (in range [6,30] nJ)
Array1D<double> t_FWHM = {0.1, 0.2}; //ps (in range [0.05, 0.5] ps)
Array1D<double> t_center = {0.0, 0.0}; //ps
Array1D<Complex> sol0 = ode.GetInitialSolutionGaussian(Et, t_FWHM, t_center);

// for (int i = 0; i < num_time_points; ++i)
// std::cout << std::abs(sol0(i)) << ", " << std::abs(sol0(num_time_points + i)) << std::endl;
Array1D<Complex> sol0;
ComputeGaussianPulse(Et, t_FWHM, t_center, ode.GetTimeVector(), sol0);

double z_start = 0, z_end = 7.5; //[m]
int nz = 15000*20;
Expand All @@ -73,6 +71,7 @@ int main()
std::cout << "Problem parameters:\n"
<< " Time range : [" << tmin << ", " << tmax << "] ps\n"
<< " Z max : " << z_end << " m\n"
<< " No. of time points : " << num_time_points << "\n"
<< " No. of z-steps : " << nz << "\n"
<< " Solution storage freq.: Every " << storage_stride << " steps\n"
<< std::endl;
Expand Down Expand Up @@ -101,9 +100,9 @@ int main()
{
if (i % storage_stride == 0)
{
for (int j = 0; j < num_time_points-1; ++j)
f << abs(sol_hist(i, j*num_modes + mode)) << ", ";
f << abs(sol_hist(i, (num_time_points-1)*num_modes + mode)) << std::endl;
for (int j = 0; j < num_time_points-1; ++j)
f << abs(sol_hist(i, j*num_modes + mode)) << ", ";
f << abs(sol_hist(i, (num_time_points-1)*num_modes + mode)) << std::endl;
}
}
f.close();
Expand Down
24 changes: 3 additions & 21 deletions src/ode/GPUMultimodeNLSE.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ void AddDispersionStencil5(const int num_modes, const int num_time_pts,
//Calculate solution time-derivatives using stencil data
T sol_tderiv1 = 0.5*(sol_ip1 - sol_im1) * inv_dt;
T sol_tderiv2 = (sol_ip1 - 2.0*sol_i + sol_im1) * inv_dt2;
T sol_tderiv3 = (0.5*sol_ip2 - sol_ip1 + sol_im1 - 0.5*sol_im2) * inv_dt3;
T sol_tderiv4 = (sol_ip2 - 4.0*sol_ip1 + 6.0*sol_i - 4.0*sol_im1 + sol_im2) * inv_dt4;
T sol_tderiv3 = (0.5*(sol_ip2 - sol_im2) - sol_ip1 + sol_im1) * inv_dt3;
T sol_tderiv4 = (sol_ip2 - 4.0*(sol_ip1 + sol_im1) + 6.0*sol_i + sol_im2) * inv_dt4;

#if 0
if (t == 128 && p == 0) {
Expand Down Expand Up @@ -292,6 +292,7 @@ class GPUMultimodeNLSE : public ODE<T>
}

int GetSolutionSize() const { return num_modes_*num_time_points_; }
const Array1D<double>& GetTimeVector() const { return tvec_; }

void EvalRHS(const GPUArray1D<T>& sol, int step, double z, GPUArray1D<T>& rhs)
{
Expand Down Expand Up @@ -365,25 +366,6 @@ class GPUMultimodeNLSE : public ODE<T>
#endif
}

Array1D<T> GetInitialSolutionGaussian(const Array1D<double>& Et, const Array1D<double>& t_FWHM, const Array1D<double>& t_center)
{
assert(num_modes_ == (int) Et.size());
assert(num_modes_ == (int) t_FWHM.size());
assert(num_modes_ == (int) t_center.size());
Array1D<T> sol(GetSolutionSize());

for (int mode = 0; mode < num_modes_; ++mode)
{
const double A = std::sqrt(1665.0*Et(mode) / ((double)num_modes_ * t_FWHM(mode) * std::sqrt(M_PI)));
const double k = -1.665*1.665/(2.0*t_FWHM(mode)*t_FWHM(mode));
const double& tc = t_center(mode);

for (int j = 0; j < num_time_points_; ++j)
sol(j*num_modes_ + mode) = A * std::exp(k*(tvec_(j)-tc)*(tvec_(j)-tc));
}
return sol;
}

protected:
const int num_modes_;
const int num_time_points_;
Expand Down
36 changes: 36 additions & 0 deletions src/ode/InitialCondition.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// nlse++
// C++ library for solving the nonlinear Schrödinger equation
// Licensed under the MIT License (http://opensource.org/licenses/MIT)
// Copyright (c) 2023, Savithru Jayasinghe

#define _USE_MATH_DEFINES //For MSVC
#include <cmath>

#include <autodiffeq/linearalgebra/Array1D.hpp>

namespace autodiffeq
{

template<typename T, typename Ts>
inline void ComputeGaussianPulse(const Array1D<T>& Et, const Array1D<T>& t_FWHM, const Array1D<T>& t_center,
const Array1D<double>& time_vec, Array1D<Ts>& sol)
{
const int num_modes = (int) Et.size();
const int num_time_points = (int) time_vec.size();
assert(num_modes == (int) t_FWHM.size());
assert(num_modes == (int) t_center.size());
sol.resize(num_modes * num_time_points);

for (int mode = 0; mode < num_modes; ++mode)
{
const double A = std::sqrt(1665.0*Et(mode) / ((double)num_modes * t_FWHM(mode) * std::sqrt(M_PI)));
const double k = -1.665*1.665/(2.0*t_FWHM(mode)*t_FWHM(mode));
const double& tc = t_center(mode);

#pragma omp parallel for
for (int j = 0; j < num_time_points; ++j)
sol(j*num_modes + mode) = A * std::exp(k*(time_vec(j)-tc)*(time_vec(j)-tc));
}
}

}
Loading

0 comments on commit a882015

Please sign in to comment.