Skip to content

Commit e842bf7

Browse files
committed
Update runq.c
runq - moarrr openmp/openacc parallel loops
1 parent 1c47da5 commit e842bf7

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

runq.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,6 @@ void rmsnorm(float* o, float* x, float* weight, int size) {
506506
#ifdef BLAS
507507
ss = cblas_sdot(size, x, 1.0f, x, 1.0f);
508508
#else
509-
// END L2E Addition
510-
// L2E Addition
511509
#ifdef ACCEL
512510
ACCELRD(ss) // OMP/OACC Macro
513511
#endif
@@ -727,6 +725,11 @@ float* forward(Transformer* transformer, int token, int pos) {
727725
matmul(s->xb2, &s->xq, w->wo + l, dim, dim);
728726

729727
// residual connection back into x
728+
// L2E Addition
729+
#ifdef ACCEL
730+
ACCELS() // OMP/OACC Macro
731+
#endif
732+
// END L2E Addition
730733
for (int i = 0; i < dim; i++) {
731734
x[i] += s->xb2[i];
732735
}
@@ -741,6 +744,11 @@ float* forward(Transformer* transformer, int token, int pos) {
741744
matmul(s->hb2, &s->xq, w->w3 + l, dim, hidden_dim);
742745

743746
// SwiGLU non-linearity
747+
// L2E Addition
748+
#ifdef ACCEL
749+
ACCELS() // OMP/OACC Macro
750+
#endif
751+
// END L2E Addition
744752
for (int i = 0; i < hidden_dim; i++) {
745753
float val = s->hb[i];
746754
// silu(x)=x*σ(x), where σ(x) is the logistic sigmoid

0 commit comments

Comments
 (0)