You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: jsrc/ct.c
+2-2Lines changed: 2 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -533,7 +533,7 @@ static I jtthreadcreate(J jt,I n){
533
533
R1;
534
534
}
535
535
536
-
// execute the user's task. Result is an ordinary array or a pyx. Bivalent
536
+
// execute the user's task. Result is an ordinary array or a pyx. Bivalent (a,w,self) or (w,self,self) called from unquote or parse
537
537
staticAjttaskrun(Jjt,Aarg1, Aarg2, Aarg3){Apyx;
538
538
ARGCHK2(arg1,arg2); // the verb is not the issue
539
539
RZ(pyx=jtcreatepyx(jt,-2,inf));
@@ -549,7 +549,7 @@ static A jttaskrun(J jt,A arg1, A arg2, A arg3){A pyx;
549
549
// or if it is UNINCORPABLE (in which case we only need to clone the nonrecursive block). After that, ra() the arguments to protect them until the task completes.
550
550
// It would be nice to be able to free the virtual before the task completes, but we don't have a way to (we could realize/fa in the worker, but why?). The virtual backer will be tied up during the task, but we
551
551
// won't have to copy the data here and then transfer it in the task
552
-
if(dyad){ra(arg3);} // arg3 is x/self, so never virtual; just ra
552
+
ASSERT(ISDENSE(AT(arg1)),EVNONCE) if(dyad){ASSERT(ISDENSE(AT(arg2)),EVNONCE) ra(arg3);} // Don't allow sparse args since we can't box them; arg3 is self, so never virtual; just ra
// we do that we have to remove any virtual blocks created here so that they don't raise y
340
340
rifv(uz); // if uz is a virtual, realize it in case it is backed by y
341
341
RZ(uz=EPILOGNORET(uz)); // free any virtual blocks we created
342
-
if(negifipw<0)ACRESET(w,origacw) // usecount of y has been restored; restore inplaceability
342
+
if((origacw&negifipw&(AC(w)-2))<0)ACRESET(w,origacw) // usecount of y has been restored; restore inplaceability. The use of origacw is subtle. In a multithreaded system you mustn't reset the usecount lest another thread
343
+
// has raised it. So, we reset AC to ACINPLACE only in the case where it was originally inplaceable, because then we can be sure the same block is not in use in another thread.
344
+
// Also, if AC(w) is above 1, it has escaped and must no longer be inplaced. If it isn't above 1, it must be confined to here
343
345
// do the inverse
344
346
if(FAV(v)->id==CCOMMA){RZ(z=reshape(shape(w),uz)); // inv for , is ($w)&($,)
345
347
}else{RZ(z=jtamendn2(jtinplace,uz,w,FAV(v)->fgh[0],ds(CAMEND))); // inv for m&{ is m}&w
// perhaps something like an lru cache of threads recently freed to? Do a linear scan of the first k entries (maybe w/short simd if the first is a miss), and if they all miss, then fall back to--snmalloc trick, or sort buffer, or something else
238
238
// Or maybe a fixed-size cache, and anything that falls out of it gets immediately flushed? I like that, because it helps prevent singleton allocations from getting lost
239
239
DCsitop; /* pointer to top of SI stack */
240
-
A*pmttop; // tstack top to free to when releasing the postmortem stack. Non0 indicates pm debugging session is active
240
+
A*pmttop; // tstack top to free to when releasing the postmortem stack. Non0 indicates pm debugging session is active Could move to JST
Imfreegenallo; // Amount allocated through malloc, biased modified only by owning thread
247
247
Imalloctotal; // net total of malloc/free performed in m.c only modified only by owning thread
248
248
UIcstackinit; // C stack pointer at beginning of execution
249
-
Ifiller7[4];
249
+
Imfreegenalloremote; // Amount allocated through malloc but freed by other threads (frees only, so always negative)
250
+
Imalloctotalremote; // net total of malloc/free performed in m.c only but freed by other threads (frees only, so always negative)
251
+
Imalloctotalhwmk; // highest value since most recent 7!:1
252
+
Ifiller7[1];
250
253
// end of cacheline 7
251
254
C_cl8[0];
252
255
@@ -343,8 +346,10 @@ typedef struct JSTstruct {
343
346
#ifMEMAUDIT&2
344
347
Caudittstackdisabled; // set to 1 to disable auditing
345
348
#endif
346
-
// 2-3 byte free
349
+
// 0-1 byte free
347
350
// rest of cacheline used only in exceptional paths
351
+
Coleop; /* com flag to capture output */
352
+
UCcstacktype; /* cstackmin set during 0: jt init 1: passed in JSM 2: set in JDo */
348
353
void*smpoll; /* re-used in wd */
349
354
void*opbstr; /* com ptr to BSTR for captured output */
350
355
Ifiller3[4];
@@ -411,14 +416,11 @@ typedef struct JSTstruct {
411
416
UScachesizes[3]; // [0]: size of fastest cache [1]: size of largest cache private to each core [2]: size of largest cache shared by all cores, in multiples of 4KB
412
417
Cbx[11]; /* box drawing characters */
413
418
UCdisp[7]; // # different verb displays, followed by list thereof in order of display could be 15 bits
414
-
Coleop; /* com flag to capture output */
415
-
UCcstacktype; /* cstackmin set during 0: jt init 1: passed in JSM 2: set in JDo */
416
-
// 6 bytes free
417
419
#ifPYXES||1
418
420
JOBQ (*jobqueue)[MAXTHREADPOOLS]; // one JOBQ block for each threadpool
Copy file name to clipboardExpand all lines: jsrc/m.c
+39-20Lines changed: 39 additions & 20 deletions
Original file line number
Diff line number
Diff line change
@@ -364,6 +364,10 @@ B jtspfree(J jt){I i;A p;
364
364
// adding the bytes for those blocks to mfreebgenallo
365
365
jt->mfreegenallo-=SBFREEB- (jt->memballo[i] & ~MFREEBCOUNTING); // subtract diff between current mfreeb[] and what it will be set to
366
366
jt->memballo[i] =SBFREEB+ (jt->memballo[i] &MFREEBCOUNTING); // set so we trigger rescan when we have allocated another SBFREEB bytes
367
+
368
+
// transfer bytes freed in other threads back to the totals for this thread
369
+
Ixfct=jt->malloctotalremote; jt->malloctotal+=xfct; __atomic_fetch_sub(&jt->malloctotalremote,xfct,__ATOMIC_ACQ_REL); // remote mods must be atomic
370
+
xfct=jt->mfreegenalloremote; jt->mfreegenallo+=xfct; __atomic_fetch_sub(&jt->mfreegenalloremote,xfct,__ATOMIC_ACQ_REL); // remote mods must be atomic
367
371
}
368
372
}
369
373
jt->uflags.spfreeneeded=0; // indicate no check needed yet
@@ -471,17 +475,16 @@ F1(jtmmaxs){I j,m=MLEN,n;
471
475
} /* 9!:21 space limit set */
472
476
473
477
474
-
// Get total # bytes in use. That's total allocated so far, minus the bytes in the free lists and the blocks to be repatriated.
478
+
// Get total # bytes in use in the current thread. That's total allocated so far, minus the bytes in the free lists and the blocks to be repatriated.
475
479
// mfreeb[] is a negative count of blocks in the free list, and biased so the value goes negative
476
-
// when garbage-collection is required. All non-pool allocations are accounted for in
477
-
// mfreegenallo
480
+
// when garbage-collection is required. All non-pool allocations are accounted for in mfreegenallo
478
481
// At init, each mfreeb indicates SBFREEB bytes. mfreegenallo is negative to match that total,
479
482
// indicating nothing has really been allocated; that's (PLIML-PMINL+1)*SBFREEB to begin with. When a block
480
483
// is allocated, mfreeb[] increases; when a big block is allocated, mfreegenallo increases by the
481
484
// amount of the allocation, and mfree[-PMINL+n] decreases by the amount in all the blocks that are now
482
485
// on the free list.
483
486
// At coalescing, mfreeb is set back to indicate SBFREEB bytes, and mfreegenallo is decreased by the amount of the setback.
484
-
Ijtspbytesinuse(Jjt){Ii,totalallo=jt->mfreegenallo&~MFREEBCOUNTING; // start with bias value
487
+
Ijtspbytesinuse(Jjt){Ii,totalallo=(jt->mfreegenallo&~MFREEBCOUNTING)+jt->mfreegenalloremote; // start with bias value
485
488
if(jt->repatq)totalallo-=AC(jt->repatq); // bytes awaiting gc should not be considered inuse
486
489
for(i=PMINL;i<=PLIML;++i){totalallo+=jt->memballo[-PMINL+i]&~MFREEBCOUNTING;} // add all the allocations
487
490
Rtotalallo;
@@ -910,16 +913,19 @@ A jtgc(J jt,A w,A* old){
910
913
// calls where w is the oldest thing on the tpush stack are not uncommon. In that case we don't need to do ra/tpop/fa/repair-inplacing. We can also save the repair if we KNOW w will be freed during the tpop
911
914
A*pushp=jt->tnextpushp; // top of tstack
912
915
if(old==pushp){if(AC(w)>=0){ra(w); tpush(w);} // if nothing to pop: (a) if inplaceable, make no change (value must be protected up the tstack); (b) otherwise protect the value on the tstack
913
-
}elseif(*old==w){ // does the start of tstack point to w?
914
-
// w is the first element on the tstack. If it is the ONLY element, we can stand pat; no need to make w recursive
915
-
if(old!=pushp-1){
916
-
// there are other elements on tstack, we have to make w recursive because freeing one might otherwise delete contents of w. We can leave inplace status unchanged for w
917
-
radescend(w); A*old1=old+1; if(likely(((UI)old1&(NTSTACKBLOCK-1))!=0))tpop(old1); else{*old=0; tpop(old); tpush(w);} // make w recursive; if we can back up to all but the first stack element, do that, leaving w on stack as before; otherwise reinstall
918
-
} // raise descendants. Descendants were raised only when w turned from nonrecursive to recursive. Sparse w also descends, but always recurs in tpush
919
-
}elseif(((UI)REPSGN(AC(w))&(UI)AZAPLOC(w))>=(UI)old&&likely((((UI)old^(UI)pushp)&-NTSTACKBLOCK)==0)){ // inplaceable zaploc>=old - but that is valid only when we know pushp and old are in the same stack block
920
-
// We can see that w is abandoned and is about to be freed. Swap it with *old and proceed
921
-
radescend(w); *AZAPLOC(w)=*old; *old=w; AZAPLOC(w)=old; tpop(old+1); // update ZAPLOC to point to new position in stack
916
+
}elseif(likely(ISDENSE(AT(w)))){ // sparse blocks cannot simply be left in *old because the contents are farther down the stack and would have to be protected too
917
+
if(*old==w){ // does the start of tstack point to w?
918
+
// w is the first element on the tstack. If it is the ONLY element, we can stand pat; no need to make w recursive
919
+
if(old!=pushp-1){
920
+
// there are other elements on tstack, we have to make w recursive because freeing one might otherwise delete contents of w. We can leave inplace status unchanged for w
921
+
radescend(w); A*old1=old+1; if(likely(((UI)old1&(NTSTACKBLOCK-1))!=0))tpop(old1); else{*old=0; tpop(old); tpush(w);} // make w recursive; if we can back up to all but the first stack element, do that, leaving w on stack as before; otherwise reinstall
922
+
} // raise descendants. Descendants were raised only when w turned from nonrecursive to recursive. Sparse w also descends, but always recurs in tpush
923
+
}elseif(((UI)REPSGN(AC(w))&(UI)AZAPLOC(w))>=(UI)old&&likely((((UI)old^(UI)pushp)&-NTSTACKBLOCK)==0)){ // inplaceable zaploc>=old - but that is valid only when we know pushp and old are in the same stack block
924
+
// We can see that w is abandoned and is about to be freed. Swap it with *old and proceed
925
+
radescend(w); *AZAPLOC(w)=*old; *old=w; AZAPLOC(w)=old; tpop(old+1); // update ZAPLOC to point to new position in stack
926
+
}else goto general; // no applicable special case, do the ra/tpop sequence
922
927
}else{
928
+
general:;
923
929
// general case, w not freed or not abandoned
924
930
ra(w); // protect w and its descendants from tpop; also converts w to recursive usecount (unless sparse).
925
931
// if we are turning w to recursive, this is the last pass through all of w incrementing usecounts. All currently-on-stack pointers to blocks are compatible with the increment
@@ -1130,9 +1136,9 @@ A* jttg(J jt, A *pushp){ // Filling last slot; must allocate next page.
1130
1136
1131
1137
// back the tpush stack up to the previous allocation. We have just popped off the last element of the current allocation
1132
1138
// (that is, we have moved tnextpushp to the chain field at the start of the allocation)
1133
-
// we keep one allocation in hand in tstacknext to avoid hysteresis. If there is one already there
1139
+
// we keep one allocation in hand in tstacknext to avoid hysteresis. If there is one already there we free it
1134
1140
voidfreetstackallo(Jjt){
1135
-
if(jt->tstacknext){FREECHK(jt->tstacknext); __atomic_fetch_sub(&jt->malloctotal,NTSTACK+NTSTACKBLOCK,__ATOMIC_ACQ_REL);} // account for malloc'd memory
1141
+
if(jt->tstacknext){FREECHK(jt->tstacknext); jt->malloctotal-=NTSTACK+NTSTACKBLOCK;} // account for malloc'd memory
1136
1142
// We will set the block we are vacating as the next-to-use. We keep only 1 such; if there is one already, free it
1137
1143
jt->tstacknext=jt->tstackcurr; // save the next-to-use, after removing bias
1138
1144
jt->tstackcurr=(A*)jt->tstackcurr[0]; // back up to the previous block
@@ -1239,7 +1245,8 @@ __attribute__((noinline)) A jtgafallopool(J jt){
1239
1245
ASSERT(av=MALLOC(PSIZE+TAILPAD),EVWSFULL);
1240
1246
#endif
1241
1247
Iblockx=(I)jt&63; jt=(J)((I)jt&-64);
1242
-
Int=jt->malloctotal+=PSIZE+TAILPAD+ALIGNPOOLTOCACHE*CACHELINESIZE; // add to total JE mem allocated
1248
+
jt->malloctotal+=PSIZE+TAILPAD+ALIGNPOOLTOCACHE*CACHELINESIZE; // add to total JE mem allocated
1249
+
Int=jt->malloctotalremote+jt->malloctotal; // get net total allocated from this thread & not freed
1243
1250
jt->mfreegenallo+=PSIZE+TAILPAD+ALIGNPOOLTOCACHE*CACHELINESIZE; // add to total from OS
A*tp=jt->tnextpushp; AZAPLOC(z)=tp; *tp++=z; jt->tnextpushp=tp; if(unlikely(((I)tp&(NTSTACKBLOCK-1))==0))RZ(z=jttgz(jt,tp,z)); // do the tpop/zaploc chaining
1286
1294
MOREINIT(z); // init allocating thread# and clear the lock
@@ -1553,11 +1561,22 @@ printf("%p-\n",w);
1553
1561
#endif
1554
1562
allocsize+=TAILPAD+ALIGNTOCACHE*CACHELINESIZE; // the actual allocation had a tail pad and boundary
1555
1563
#ifPYXES
1556
-
jt=JTFORTHREAD1(jt,w->origin); // for space accounting, switch to the thread the block came from *** this modifies jt ***
1557
-
#endif
1564
+
Jjtremote=JTFORTHREAD1(jt,w->origin);
1565
+
if(likely(jtremote==jt)){ // normal case of freeing in the allocating thread: avoid atomics
1566
+
// obsolete jt=JTFORTHREAD1(jt,w->origin); // for space accounting, switch to the thread the block came from *** this modifies jt ***
1567
+
jt->malloctotal-=allocsize;
1568
+
jt->mfreegenallo-=allocsize; // account for all the bytes returned to the OS
1569
+
}else{ // the block was allocate in another thread. Account for its free there
if(unlikely(jtremote->mfreegenallo&MFREEBCOUNTING))__atomic_fetch_sub(&jtremote->bytes,allocsize,__ATOMIC_ACQ_REL); // keep track of total allocation, needed only if enabled
1574
+
#else
1558
1575
jt->malloctotal-=allocsize;
1559
1576
jt->mfreegenallo-=allocsize; // account for all the bytes returned to the OS
1560
-
if(unlikely(jt->mfreegenallo&MFREEBCOUNTING))jt->bytes-=allocsize; // keep track of total allocation, needed only if enabled
1577
+
if(unlikely(jt->mfreegenallo&MFREEBCOUNTING))__atomic_fetch_sub(&jt->bytes,allocsize,__ATOMIC_ACQ_REL); // keep track of total allocation, needed only if enabled
1578
+
#endif
1579
+
1561
1580
#ifALIGNTOCACHE
1562
1581
FREECHK(((I**)w)[-1]); // point to initial allocation and free it
Copy file name to clipboardExpand all lines: jsrc/sc.c
+1-1Lines changed: 1 addition & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -184,7 +184,7 @@ DF2(jtunquote){A z;
184
184
jt->parserstackframe.sf=fs; // as part of starting the name we set the new recursion point
185
185
// Execute the name. First check 4 flags at once to see if anything special is afoot: debug, pm, bstk, garbage collection
186
186
if(likely(!(jt->uflags.ui4))) {
187
-
// No special processing. Just run the entity
187
+
// No special processing. Just run the entity as (a,w,self) or (w,self,self)
188
188
// We preserve the XDEFMODIFIER flag in jtinplace, because the type of the exec must not have been changed by name lookup. Pass the other inplacing flags through if the call supports inplacing
189
189
z=(*actionfn)((J)((I)jt+((FAV(fs)->flag&(flgd0cpC&FLGMONAD+FLGDYAD)?JTFLAGMSK:JTXDEFMODIFIER)&flgd0cpC)),a,w,fs); // keep MODIFIER flag always, and others too if verb supports it
190
190
if(unlikely(z==0)){jteformat(jt,jt->parserstackframe.sf,a,w,0);} // make this a format point
0 commit comments