Skip to content

Commit e879851

Browse files
committed
Short-caching of verbs broken
1 parent 38c328e commit e879851

File tree

12 files changed

+131
-102
lines changed

12 files changed

+131
-102
lines changed

jsrc/cc.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ DF2(jtspecialatoprestart){F12IP;
141141

142142
// x <;.0 y and x (<;.0~ -~/"2)~ y where _1 { $x is 1 (i. e. 1 dimension of selection) localuse distinguishes the two cases (relative vs absolute length)
143143
// We go for minimum overhead in the box allocation and copy
144-
DF2(jtboxcut0){F12IP;A z;
144+
DF2(jtboxcut0){F12IP;A z;I i;
145145
ARGCHK2(a,w);
146146
// NOTE: this routine is called from jtwords. In that case, self comes from jtwords and is set up with the parm for x (<;.0~ -~/"2)~ y but with no failover routine.
147147
// Thus, the preliminary tests must not cause a failover. They don't, because the inputs from jtwords are known to be well-formed
@@ -159,21 +159,21 @@ DF2(jtboxcut0){F12IP;A z;
159159
I resatoms; PROD(resatoms,f,AS(a)); I cellsize; PROD(cellsize,wr-1,AS(w)+1);
160160
I k=bplg(t); C *wv=CAV(w); // k is length of an atom of w
161161
// allocate the result area
162-
GATV(z,BOX,resatoms,f,AS(a)); if(resatoms==0){RETF(z);} // could avoid filling with 0 if we modified AN after error, or cleared after *tnextpushp
162+
GATV(z,BOX,resatoms,f,AS(a)); if(resatoms==0){RETF(z);} // scaf could avoid filling with 0 if we modified AN after error, or cleared after *tnextpushp
163163
// We have allocated the result; now we allocate a block for each cell of w and copy
164164
// the w values to the new block.
165165
A *pushxsave; // place to restore tstack to
166166
AFLAGINIT(z,BOX) // Make result inplaceable; recursive too, since otherwise the boxes won't get freed
167167
// divert the allocation system to use the result area a tstack
168168
pushxsave = jt->tnextpushp; jt->tnextpushp=AAV(z); // save tstack info before allocation
169-
// MUST NOT FAIL UNTIL tstack restored
169+
// MUST NOT FAIL UNTIL tstack restored ****************************************************************************************************
170170
A y; // y is the newly-allocated block
171171
// Step through each block: fetch start/end; verify both positive and inrange; calc size of block; alloc and move; make block recursive
172172
I (*av)[2]=(I (*)[2])voidAV(a); // pointer to first start/length pair
173173
I abslength=(I)FAV(self)->localuse.boxcut0.parm; // 0 for start/length, ~0 for start/end+1
174174
wr=wr==0?1:wr; // We use this rank to allocate the boxes - we always create arrays
175-
A wback=ABACK(w); wback=AFLAG(w)&AFVIRTUAL?wback:w; // w is the backer for new blocks unless it is itself sirtual
176-
I i; for(i=0;i<resatoms;++i){
175+
A wback=ABACK(w); wback=AFLAG(w)&AFVIRTUAL?wback:w; // w is the backer for new blocks unless it is itself virtual
176+
for(i=0;i<resatoms;++i){
177177
I start=av[i][0]; I endorlen=av[i][1];
178178
if(!(BETWEENO(start,0,wi))){jt->tnextpushp=pushxsave; R (FAV(self)->localuse.boxcut0.func)(jtfg,a,w,self);} // verify start in range - failover if not
179179
endorlen+=start&abslength; // convert len to end+1 form
@@ -200,7 +200,7 @@ DF2(jtboxcut0){F12IP;A z;
200200
// raise the backer for all the virtual blocks taken from it. The first one requires ra() to force the backer recursive; after that we can just add to the usecount. And make w noninplaceable, since it now has an alias at large
201201
if(unlikely((I)jtfg&JTWILLBEOPENED)){I nboxes=jt->tnextpushp-AAV(z); if(likely(nboxes!=0)){ACIPNO(w); ra(wback); ACADD(wback,nboxes-1);}} // get # boxes allocated without error
202202
jt->tnextpushp=pushxsave; // restore tstack pointer
203-
// OK to fail now - memory is restored
203+
// OK to fail now - memory is restored ******************************************************************************************************
204204
ASSERT(y!=0,EVWSFULL); // if we broke out on allocation failure, fail. Since the block is recursive, when it is tpop()d it will recur to delete contents
205205
// The result can be called pristine if the contents are DIRECT and the result is recursive, because it contains all copied data
206206
AFLAGORLOCAL(z,(-(t&DIRECT))&(~(I)jtfg<<(AFPRISTINEX-JTWILLBEOPENEDX))&AFPRISTINE)

jsrc/j.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,8 +1344,22 @@ if(likely(!((I)jtfg&JTWILLBEOPENED)))z=EPILOGNORET(z); RETF(z); \
13441344
// gives errors in some versions #define ALLOBLOCK(n) MAX(PMINL-1,(31-__builtin_clzl((UI4)(n)))) // lg2(#bytes to allocate)-1. n is #bytes-1
13451345
// value to put into name->bucketx for locale names: number if numeric, hash otherwise
13461346
#define BUCKETXLOC(len,s) ((*(s)<='9')?strtoI10s((len),(s)):(I)nmhash((len),(s)))
1347-
#define ACVCACHECLEAR __atomic_fetch_add(&JT(jt,fnasgnct),1,__ATOMIC_ACQ_REL) // incr the cache counter for ACVs
1348-
#define ACVCACHEREAD __atomic_load_n(&JT(jt,fnasgnct),__ATOMIC_ACQUIRE) // read the current cache counter
1347+
// the cache counter/lock is 48 bits of counter and 16 bits of lock (8 bits each read/write). Write unlock increments the counter
1348+
#if PYXES
1349+
#define ACVCACHEREAD (__atomic_load_n(&JT(jt,fnasgnct),__ATOMIC_ACQUIRE)>>16) // read the current cache counter
1350+
#define ACVCACHEREADLOCK ({UI prev; if(unlikely(((prev=__atomic_fetch_add(&JT(jt,fnasgnct),1,__ATOMIC_ACQ_REL))&(US)-WLOCKBIT)!=0)){readlock((S*)&JT(jt,fnasgnct),(S)prev); prev=__atomic_load_n(&JT(jt,fnasgnct),__ATOMIC_ACQUIRE);} prev>>16;})
1351+
#define ACVCACHEREADUNLOCK __atomic_fetch_sub(&JT(jt,fnasgnct),1,__ATOMIC_ACQ_REL); // decrement the read bits
1352+
#define ACVCACHEWRITELOCK WRITELOCK(*(S*)&JT(jt,fnasgnct)) // take the lock, ignoring data value
1353+
#define ACVCACHECLEAR __atomic_fetch_add(&JT(jt,fnasgnct),0x10000, __ATOMIC_ACQ_REL); // increment cache count - used when we aren't freeing the only reference to the acv
1354+
#define ACVCACHEWRITEUNLOCK ACVCACHECLEAR __atomic_fetch_and(&JT(jt,fnasgnct),(UI)~(0xffff&-WLOCKBIT), __ATOMIC_ACQ_REL); // decrement write lock and increment the upper bits
1355+
#else
1356+
#define ACVCACHEREAD JT(jt,fnasgnct) // read the current cache counter
1357+
#define ACVCACHEREADLOCK ACVCACHEREAD
1358+
#define ACVCACHEREADUNLOCK
1359+
#define ACVCACHEWRITELOCK
1360+
#define ACVCACHEWRITEUNLOCK ++JT(jt,fnasgnct); // increment cache count
1361+
#define ACVCACHECLEAR ++JT(jt,fnasgnct); // increment cache count - used when we aren't freeing the only reference to the acv
1362+
#endif
13491363
// Support for int-to-float, in parallel. Input is u, 64-bit int with a type of float; result is 64-bit floats. Define DECLS first.
13501364
// we use initecho() to initialize zero and one because the compiler moves the initialization to inside the loop
13511365
#define CVTEPI64DECLS __m256i magic_i_lo = _mm256_castpd_si256(_mm256_broadcast_sd(&two_52)); /* 2^52 */ \

jsrc/ja.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@
347347
#define extnvr(x) jtextnvr(jt,(x))
348348
#define scaft2(x)
349349
#if ((MEMAUDIT&5)==5) && SY_64
350-
#define scaft(x) testbuf(x); if(AN(x)<0)SEGFAULT;
350+
#define scaft(x) testbuf(x); if(AFLAG(x)<0)SEGFAULT;
351351
#else
352352
#define scaft(x)
353353
#endif

jsrc/jt.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,14 +396,14 @@ typedef struct JSTstruct {
396396
C _cl6[0];
397397
A dbstops; /* stops set by the user */
398398
A dbtrap; // trap sentence, execute when going into suspension
399-
UI4 fnasgnct; // number of assignments to ACV, change to locale path, etc. Lookups of ACVs are cached and
400-
// reused as long as one of these cache-invalidating actions has happened.
399+
UI fnasgnct; // number of assignments to ACV, change to locale path, etc. Lookups of ACVs are cached and
400+
// reused as long as one of these cache-invalidating actions has not happened. Low 16 bits are a lock
401401
S dblock; // lock on dbstops/dbtrap
402402
// rest of cacheline is essentially read-only
403-
// 2 bytes free
403+
// 6 bytes free
404404
A evm; // message text for the EVxxx codes
405405
I (*emptylocale)[MAXTHREADS][16]; // locale with no symbols, used when not running explicits, or to avoid searching the local syms. Aligned on odd word boundary, must never be freed. One per task, because they are modified
406-
I filler6[3];
406+
I filler6[2];
407407
// end of cacheline 6
408408

409409
// Cacheline 7: startup (scripts and deprecmsgs), essentially read-only

jsrc/jtype.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1373,7 +1373,7 @@ typedef struct __attribute__((aligned(CACHELINESIZE))) {I memhdr[AKXR(0)/SZI]; u
13731373
#define FUNCTYPE0 ((A)(validitymask+12)) // 0 0 0 0, which has a 0 in the AT field
13741374
#define FUNCID0 ((A)(validitymask-4*(!SY_64))) // 0 in index [15] ([19] for 32-bit), which has a 0 in the id field of V
13751375
#define SYMVAL0 ((L*)(validitymask+12)) // 0 0, which has a 0 in the val field of L
1376-
#define AFLAG0 ((A)(validitymask+12)) // 0 0, which has a 0 in the flag field of A
1376+
#define AFLAG0 ((A)(validitymask+12)) // 0 0 0 0, which has a 0 in the flag field and type field of A
13771377
#define ANLEN0 ((A)(validitymask+12-4)) // x x x x 0 0, which has a 0 in the AN field
13781378
#define ZAPLOC0 ((A*)(validitymask+12)) // 0 used as a pointer to a null tpop-stack value
13791379
#define PSTK2NOTFINALASGN ((PSTK*)(validitymask+12)-2) // 0 in position [2], signifying NOT final assignment (used for errors)

0 commit comments

Comments
 (0)