Skip to content

Commit

Permalink
Update runq.c
Browse files Browse the repository at this point in the history
runq - speed up rmsnorm with OpenMP / OpenACC
  • Loading branch information
trholding committed Jul 20, 2024
1 parent 16e223f commit 1c47da5
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions runq.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,11 @@ __static_yoink("zipos");
#ifdef OPENMP
#define ACCELS() MK_PRAGMA(omp parallel for)
#define ACCEL(...) MK_PRAGMA(omp parallel for private(__VA_ARGS__))
#define ACCELRD(VAR) MK_PRAGMA(omp parallel for reduction(+:VAR))
#elif defined(OPENACC)
#define ACCELS() MK_PRAGMA(acc parallel loop)
#define ACCEL(...) MK_PRAGMA(acc parallel loop private(__VA_ARGS__))
#define ACCELRD(VAR) MK_PRAGMA(acc parallel loop reduction(+:VAR))
#endif

// ----------------------------------------------------------------------------
Expand Down Expand Up @@ -504,6 +506,11 @@ void rmsnorm(float* o, float* x, float* weight, int size) {
#ifdef BLAS
ss = cblas_sdot(size, x, 1.0f, x, 1.0f);
#else
// END L2E Addition
// L2E Addition
#ifdef ACCEL
ACCELRD(ss) // OMP/OACC Macro
#endif
// END L2E Addition
for (int j = 0; j < size; j++) {
ss += x[j] * x[j];
Expand All @@ -515,6 +522,11 @@ void rmsnorm(float* o, float* x, float* weight, int size) {
ss += 1e-5f;
ss = 1.0f / sqrtf(ss);
// normalize and scale
// L2E Addition
#ifdef ACCEL
ACCELS() // OMP/OACC Macro
#endif
// END L2E Addition
for (int j = 0; j < size; j++) {
o[j] = weight[j] * (ss * x[j]);
}
Expand Down

0 comments on commit 1c47da5

Please sign in to comment.