Skip to content

Commit

Permalink
WIP adding documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobias Meyer Andersen committed Nov 20, 2023
1 parent 7c75791 commit b3c2288
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 32 deletions.
16 changes: 8 additions & 8 deletions opm/simulators/linalg/ParallelOverlappingILU0_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ template<class Matrix, class Domain, class Range, class ParallelInfoT>
void ParallelOverlappingILU0<Matrix,Domain,Range,ParallelInfoT>::
apply (Domain& v, const Range& d)
{
auto start_time = std::chrono::high_resolution_clock::now();
// auto start_time = std::chrono::high_resolution_clock::now();

OPM_TIMEBLOCK(apply);
Range& md = reorderD(d);
Expand Down Expand Up @@ -347,10 +347,10 @@ apply (Domain& v, const Range& d)
reorderBack(mv, v);


auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
// auto end_time = std::chrono::high_resolution_clock::now();
// auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);

std::cout << "Apply: " << duration.count() << " microseconds." << std::endl;
// std::cout << "Apply: " << duration.count() << " microseconds." << std::endl;
}

template<class Matrix, class Domain, class Range, class ParallelInfoT>
Expand All @@ -367,7 +367,7 @@ template<class Matrix, class Domain, class Range, class ParallelInfoT>
void ParallelOverlappingILU0<Matrix,Domain,Range,ParallelInfoT>::
update()
{
auto start_time = std::chrono::high_resolution_clock::now();
// auto start_time = std::chrono::high_resolution_clock::now();

OPM_TIMEBLOCK(update);
// (For older DUNE versions the communicator might be
Expand Down Expand Up @@ -535,10 +535,10 @@ update()
// store ILU in simple CRS format
detail::convertToCRS(*ILU_, lower_, upper_, inv_);

auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
// auto end_time = std::chrono::high_resolution_clock::now();
// auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);

std::cout << "Update: " << duration.count() << " microseconds." << std::endl;
// std::cout << "Update: " << duration.count() << " microseconds." << std::endl;
}

template<class Matrix, class Domain, class Range, class ParallelInfoT>
Expand Down
40 changes: 20 additions & 20 deletions opm/simulators/linalg/cuistl/CuDILU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,60 +113,60 @@ printDuneBlockVec(V v, int blocksize)

// TODO: I think sending size is excessive
std::vector<int>
createReorderedToNatural(int size, Opm::SparseTable<size_t> level_sets)
createReorderedToNatural(int size, Opm::SparseTable<size_t> levelSets)
{
auto res = std::vector<int>(size);
int globCnt = 0;
for (int i = 0; i < level_sets.size(); i++) {
for (size_t j = 0; j < level_sets[i].size(); j++) {
res[globCnt++] = (int)level_sets[i][j];
for (int i = 0; i < levelSets.size(); i++) {
for (size_t j = 0; j < levelSets[i].size(); j++) {
res[globCnt++] = (int)levelSets[i][j];
}
}
return res;
}

std::vector<int>
createNaturalToReordered(int size, Opm::SparseTable<size_t> level_sets)
createNaturalToReordered(int size, Opm::SparseTable<size_t> levelSets)
{
auto res = std::vector<int>(size);
int globCnt = 0;
for (int i = 0; i < level_sets.size(); i++) {
for (size_t j = 0; j < level_sets[i].size(); j++) {
res[level_sets[i][j]] = globCnt++;
for (int i = 0; i < levelSets.size(); i++) {
for (size_t j = 0; j < levelSets[i].size(); j++) {
res[levelSets[i][j]] = globCnt++;
}
}
return res;
}

// TODO: When this function is called we already have the natural ordered matrix on the GPU
// TODO: could it be possible to create the reordered one in a kernel to speed up the constructor?
template <class M, class field_type>
Opm::cuistl::CuSparseMatrix<field_type>
createReorderedMatrix(M naturalMatrix, std::vector<int> reordered_to_natural)
createReorderedMatrix(M naturalMatrix, std::vector<int> reorderedToNatural)
{
M reorderedMatrix(naturalMatrix.N(), naturalMatrix.N(), naturalMatrix.nonzeroes(), M::row_wise);
for (auto dst_row_it = reorderedMatrix.createbegin(); dst_row_it != reorderedMatrix.createend(); ++dst_row_it) {
auto src_row = naturalMatrix.begin() + reordered_to_natural[dst_row_it.index()];
for (auto dstRowIt = reorderedMatrix.createbegin(); dstRowIt != reorderedMatrix.createend(); ++dstRowIt) {
auto srcRow = naturalMatrix.begin() + reorderedToNatural[dstRowIt.index()];
// For elements in A
for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
dst_row_it.insert(elem.index());
for (auto elem = srcRow->begin(); elem != srcRow->end(); elem++) {
dstRowIt.insert(elem.index());
}
}

for (auto dst_row_it = reorderedMatrix.begin(); dst_row_it != reorderedMatrix.end(); ++dst_row_it) {
auto src_row = naturalMatrix.begin() + reordered_to_natural[dst_row_it.index()];
for (auto elem = src_row->begin(); elem != src_row->end(); elem++) {
reorderedMatrix[dst_row_it.index()][elem.index()] = *elem;
// TODO: There is probably a faster way to copy by copying whole rows at a time
for (auto dstRowIt = reorderedMatrix.begin(); dstRowIt != reorderedMatrix.end(); ++dstRowIt) {
auto srcRow = naturalMatrix.begin() + reorderedToNatural[dstRowIt.index()];
for (auto elem = srcRow->begin(); elem != srcRow->end(); elem++) {
reorderedMatrix[dstRowIt.index()][elem.index()] = *elem;
}
}

return Opm::cuistl::CuSparseMatrix<field_type>::fromMatrix(reorderedMatrix, true);
}


// TODO change variables from snake_case to camelCase
namespace Opm::cuistl
{

// TODO: optimize construction by omitting unused variable [m_cpuMatrixReordered]
template <class M, class X, class Y, int l>
CuDILU<M, X, Y, l>::CuDILU(const M& A, field_type w)
: m_cpuMatrix(A)
Expand Down
1 change: 0 additions & 1 deletion opm/simulators/linalg/cuistl/CuJac.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
m_gpuMatrix.N(),
m_gpuMatrix.blockSize(),
m_diagInvFlattened.data());
printf("gpu code should be run");
}

template <class M, class X, class Y, int l>
Expand Down
17 changes: 14 additions & 3 deletions opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,21 @@ namespace Opm::cuistl::detail
template <class T>
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, size_t blocksize, T* vec);

// TODO: document this version when it is stable
/**
* @brief Perform a lower solve on certain rows in a matrix that can safely be computed in parallel
* @param reorderedMat pointer to GPU memory containing nonzerovalues of the sparse matrix. The matrix reordered such that rows in the same level sets are contiguous
* @param rowIndices Pointer to vector on GPU containing row indices compliant wiht bsr format
* @param colIndices Pointer to vector on GPU containing col indices compliant wiht bsr format
* @param numberOfRows Integer describing the number of rows in the matrix
* @param indexConversion Integer array containing mapping an index in the reordered matrix to its corresponding index in the natural ordered matrix
* @param startIdx Index of the first row of the matrix to be solve
* @param rowsInLevelSet Number of rows in this level set, which number the amount of rows solved in parallel by this function
* @param dInv The diagonal matrix used by the Diagonal ILU preconditioner
* @param d Stores the defect
* @param [out] v Will store the results of the lower solve
*/
template <class T, int blocksize>
void computeLowerSolveLevelSet(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, int* indexConversion, const int startIdx, int rowsInLevelSet, T* dInv, const T* d, T* v);

void computeLowerSolveLevelSet(T* reorderedMat, int* rowIndices, int* colIndices, size_t numberOfRows, int* indexConversion, const int startIdx, int rowsInLevelSet, T* dInv, const T* d, T* v);

// TODO: document this version when it is stable
template <class T, int blocksize>
Expand Down

0 comments on commit b3c2288

Please sign in to comment.