From e3b354569eb9b876995ae71caaa49377b81723ee Mon Sep 17 00:00:00 2001 From: Jonathan Hogg Date: Thu, 1 Sep 2016 16:14:03 +0000 Subject: [PATCH] Make assemble_pre map RAII git-svn-id: https://ccpforge.cse.rl.ac.uk/svn/spral/branches/cpu_ssids@1125 33810a50-c353-450f-be6c-41f7241c649d --- src/ssids/cpu/kernels/assemble.hxx | 143 +++++++++++++++-------------- 1 file changed, 72 insertions(+), 71 deletions(-) diff --git a/src/ssids/cpu/kernels/assemble.hxx b/src/ssids/cpu/kernels/assemble.hxx index 450e1257..78162c2a 100644 --- a/src/ssids/cpu/kernels/assemble.hxx +++ b/src/ssids/cpu/kernels/assemble.hxx @@ -59,8 +59,7 @@ void assemble_pre( typename FADoubleTraits::allocator_type factor_alloc_double(factor_alloc); typedef typename std::allocator_traits::template rebind_traits FAIntTraits; typename FAIntTraits::allocator_type factor_alloc_int(factor_alloc); - typedef typename std::allocator_traits::template rebind_traits PAIntTraits; - typename PAIntTraits::allocator_type pool_alloc_int(pool_alloc); + typedef typename std::allocator_traits::template rebind_alloc PoolAllocInt; /* Count incoming delays and determine size of node */ node.ndelay_in = 0; @@ -138,88 +137,91 @@ void assemble_pre( if(!node.first_child) task_asm_pre.done(); #endif - /* Add children */ - int* map = nullptr; + /* If we have no children, we're done. */ + if(node.first_child == nullptr && snode.contrib.size() == 0) return; + + /* + * Add children + */ int delay_col = snode.ncol; - if(node.first_child != NULL || snode.contrib.size() > 0) { - /* Build lookup vector, allowing for insertion of delayed vars */ - /* Note that while rlist[] is 1-indexed this is fine so long as lookup - * is also 1-indexed (which it is as it is another node's rlist[] */ - if(!map) map = PAIntTraits::allocate(pool_alloc_int, n+1); - for(int i=0; i map(n+1, PoolAllocInt(pool_alloc)); + for(int i=0; inext_child) { + for(auto* child=node.first_child; child!=NULL; child=child->next_child) { #ifdef PROFILE - Profile::Task task_asm_pre("TA_ASM_PRE"); + Profile::Task task_asm_pre("TA_ASM_PRE"); #endif - SymbolicNode const& csnode = child->symb; - /* Handle delays - go to back of node - * (i.e. become the last rows as in lower triangular format) */ - for(int i=0; indelay_out; i++) { - // Add delayed rows (from delayed cols) - T *dest = &node.lcol[delay_col*(ldl+1)]; - int lds = align_lda(csnode.nrow + child->ndelay_in); - T *src = &child->lcol[(child->nelim+i)*(lds+1)]; - node.perm[delay_col] = child->perm[child->nelim+i]; - for(int j=0; jndelay_out-i; j++) { - dest[j] = src[j]; - } - // Add child's non-fully summed rows (from delayed cols) - dest = node.lcol; - src = &child->lcol[child->nelim*lds + child->ndelay_in +i*lds]; - for(int j=csnode.ncol; jsymb; + /* Handle delays - go to back of node + * (i.e. become the last rows as in lower triangular format) */ + for(int i=0; indelay_out; i++) { + // Add delayed rows (from delayed cols) + T *dest = &node.lcol[delay_col*(ldl+1)]; + int lds = align_lda(csnode.nrow + child->ndelay_in); + T *src = &child->lcol[(child->nelim+i)*(lds+1)]; + node.perm[delay_col] = child->perm[child->nelim+i]; + for(int j=0; jndelay_out-i; j++) { + dest[j] = src[j]; } + // Add child's non-fully summed rows (from delayed cols) + dest = node.lcol; + src = &child->lcol[child->nelim*lds + child->ndelay_in +i*lds]; + for(int j=csnode.ncol; jcontrib) { - int cm = csnode.nrow - csnode.ncol; - int const block_size = 256; // FIXME: make configurable? - for(int iblk=0; iblkcontrib) { + int cm = csnode.nrow - csnode.ncol; + int const block_size = 256; // FIXME: make configurable? + for(int iblk=0; iblk(cm); - for(int j=iblk; jcontrib[i*cm]; - // NB: we handle contribution to contrib in assemble_post() - if(c < snode.ncol) { - // Contribution added to lcol - int ldd = align_lda(nrow); - T *dest = &node.lcol[c*ldd]; - asm_col(cm-i, &cache[i], &src[i], dest); - } + int* cache = work[omp_get_thread_num()].get_ptr(cm); + for(int j=iblk; jcontrib[i*cm]; + // NB: we handle contribution to contrib in assemble_post() + if(c < snode.ncol) { + // Contribution added to lcol + int ldd = align_lda(nrow); + T *dest = &node.lcol[c*ldd]; + asm_col(cm-i, &cache[i], &src[i], dest); } + } #ifdef PROFILE - task_asm_pre.done(); + task_asm_pre.done(); #endif - } /* task */ - } - if(cm > block_size) { - // only wait if we've actually created tasks - #pragma omp taskwait - } + } /* task */ + } + if(cm > block_size) { + // only wait if we've actually created tasks + #pragma omp taskwait } } } @@ -269,7 +271,6 @@ void assemble_pre( } } } - if(map) PAIntTraits::deallocate(pool_alloc_int, map, n+1); } template