You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: jsrc/p.c
+25-24Lines changed: 25 additions & 24 deletions
Original file line number
Diff line number
Diff line change
@@ -468,7 +468,7 @@ A jtparsea(J jt, A *queue, I nwds){F1PREFIP;PSTK *stack;A z,*v;
468
468
jt->parserstackframe.parserstkbgn=currstk+PSTACKRSV; // advance over the original-sentence info, creating an upward-growing stack at the bottom of the area. jt->parserstackframe.parserstkbgn[-1] has the error info
469
469
470
470
// mash into 1 register: bit 32-63 stack0pt, bit 29-31 (from CONJX) es delayline pull 3/2/1 after current word,
471
-
// (exec) 23-24,26 VJTFLGOK1+VJTFLGOK2+VASGSAFE from verb flags 27 PTNOTLPARX set if stack[0] is not (
471
+
// (exec) 23,26 VJTFLGOK1/2+VASGSAFE from verb flags 27 PTNOTLPARX set if stack[0] is not (
472
472
// 25 set if first stack word AFTER the executing fragment is NOT MARK/RPAR (i. e. there are executions remaining on the stack)
473
473
// (name resolution) 23-26 free
474
474
// (exec) 20-22 savearea for pmask for lines 0-2 (stack) 17,20 flags from at NAMEBYVALUE/NAMEABANDON
@@ -504,14 +504,13 @@ A jtparsea(J jt, A *queue, I nwds){F1PREFIP;PSTK *stack;A z,*v;
504
504
505
505
// Set starting word#, and number of extra words to pull from the queue. We always need 2 words after the first before a match is possible, or maybe 3 as calculated above
506
506
pt0ecam+=nwds+((0b11LL<<CONJX)<<pull4);
507
-
// debug if(jt->parsercalls==0x9d0)
508
-
// debug jt->parsercalls=0x9d0;
509
-
510
507
511
508
Ay; // y will be the word+flags for the next queue value to process. We reload it as so that it never has to be saved over a call
512
509
y=*(volatileA*)queue; // unroll y once
513
510
514
511
++jt->parsercalls; // now we are committed to full parse. Push stacks.
512
+
// debug if(jt->parsercalls == 0x4)
513
+
// debug jt->parsercalls = 0x4;
515
514
PSTK*stackend1=stack=jt->parserstackframe.parserstkend1-BACKMARKS; // start at the end, pointing to first of 3 marks
516
515
517
516
// We don't actually put a mark in the queue at the beginning. When m goes down to 0, we infer a mark.
@@ -670,17 +669,17 @@ endname: ;
670
669
// register pressure is severe where we do subroutine calls below
671
670
Ipmask=(I)((C*)&stack[1].pt)[1] & (I)((C*)&stack[2].pt)[2]; // stkpos 2 is enough to detect bit 0 if result is 0-4
672
671
PSTK*fsa=(PSTK*)((I)stack+((2*sizeof(PSTK))>>((I)((C*)&stack[2].pt)[2]&1))); // pointer to stack slot for the CAV to be executed, for lines 0-4 1 2 2 (2 2)
673
-
Afs=QCWORD(__atomic_load_n(&fsa->a,__ATOMIC_ACQUIRE)); // the action to be executed if lines 0-4. Must read early: dependency is pmask[0]->fsa->fs->fsflag to settle before we check assignments
672
+
Afs=QCWORD(__atomic_load_n(&fsa->a,__ATOMIC_ACQUIRE)); // the action to be executed if lines 0-4. Must read early: dependency is pmask[0]->fsa->fs->fsflag to settle before we check assignments. Could be garbage
674
673
pmask&=GETSTACK0PT; // finish 1st 3 columns of parse
675
674
// obsolete PSTK *fsa=&stack[2-(pmask&1)]; // pointer to stack slot for the CAV to be executed, for lines 0-4 1 2 2 (2 2)
676
675
// obsolete not in J32 PSTK *fsa=(PSTK *)((I)&stack[2]+((pmask<<(BW-1))>>(BW-4))); // pointer to stack slot for the CAV to be executed, for lines 0-4 1 2 2 (2 2)
677
676
// clang creates a branch! PSTK *fsa=&stack[2], *fsa1=&stack[1]; fsa=pmask&1?fsa1:fsa; // pointer to stack slot for the CAV to be executed, for lines 0-2 1 2 2
678
-
pmask&=(I)((C*)&stack[3].pt)[3]; // finish 3d column of parse
679
-
Afs1=__atomic_load_n(&stack[1].a,__ATOMIC_ACQUIRE); // in case of line 1 V0 V1 N2, we will need the flags from V1. path is fs1,fs->fs1flag to settle before the second assignment check
677
+
pmask&=(I)((C*)&stack[3].pt)[3]; // finish 4th column of parse
678
+
Afs1=__atomic_load_n(&stack[1].a,__ATOMIC_ACQUIRE); // in case of line 1 V0 V1 N2, we will need the flags from V1. Could be garbage
680
679
pt0ecam&=~(VJTFLGOK1+VJTFLGOK2+VASGSAFE+PTNOTLPAR+NOTFINALEXEC+(7LL<<PMASKSAVEX)); // clear all the flags we will use
681
680
682
-
if(pmask!=0){ // If all 0, nothing is dispatchable, go push next word after checking for (
683
-
fs1=QCWORD(fs1); // clear flags from addres
681
+
if(likely(pmask!=0)){ // If all 0, nothing is dispatchable, go push next word after checking for ( . likely is an overstatement but it gives better register usage
682
+
fs1=QCWORD(fs1); // clear flags from address
684
683
// We are going to execute an action routine. This will be an indirect branch, and it will mispredict. To reduce the cost of the misprediction,
685
684
// we want to pile up as many instructions as we can before the branch, preferably getting out of the way as many loads as possible so that they can finish
686
685
// during the pipeline restart. The perfect scenario would be that the branch restarts while the loads for the stack arguments are still loading.
@@ -700,12 +699,12 @@ endname: ;
700
699
if(unlikely((!PTISMARKBACKORRPAR(fsa[2]))))pt0ecam|=NOTFINALEXEC; // remember if this exec is final in its branch. Wait till we know not fail, so we don't have to wait for (. Used after execution
701
700
pt0ecam|=pmask<<PMASKSAVEX; // save pmask over the subroutine calls - also used after the verb execution
702
701
// Get the branch-to address. It comes from the appropriate valence of the appropriate stack element. Stack element is 2 except for line 0; valence is monadic for lines 0 1 4
703
-
jt->fillv=(C*)__atomic_load_n(&FAV(fs)->valencefns[pmask>>=2],__ATOMIC_RELAXED); // the routine we will execute. If we do this after the assignment block, jt gets spilled and we have 3 back-to-back dependent loads,
704
-
// now pmask is only 1 bit, indicating 'dyad'
705
-
// which is more than the runout can cover. Symbol lookups don't use fillv. We put the atomic_load here to encourage early load of notfinalexec
706
-
jt->parserstackframe.sf=fs; // set new recursion point for $:; this frees fs
702
+
pmask>>=2; // now pmask is only 1 bit, indicating 'dyad'
703
+
// obsolete jt->fillv=(C*)__atomic_load_n(&FAV(fs)->valencefns[pmask],__ATOMIC_RELAXED); // the routine we will execute. If we do this after the assignment block, jt gets spilled and we have 3 back-to-back dependent loads,
704
+
jt->parserstackframe.sf=fs; // set new recursion point for $:
705
+
AFactionfn=__atomic_load_n(&FAV(fs)->valencefns[pmask],__ATOMIC_RELAXED); // frees fs the routine we will execute. We put the atomic_load here to encourage early load of notfinalexec. clang17 keeps this in a reg till the call
707
706
fs1flag&=fsflag; // include ASGSAFE from V0 (if applicable, otherwise just a duplicate of fsflag)
708
-
fsflag>>=pmask; fsflag&=VJTFLGOK1; // select the monad/dyad bit indicating inplaceability, stored in the monad bit
707
+
fsflag>>=pmask; fsflag&=VJTFLGOK1; // select the monad/dyad bit indicating inplaceability, store it in the monad bit pf flags
709
708
pt0ecam|=fsflag; // insert flag into portmanteau reg. Used in the execution
710
709
// If it is an inplaceable assignment to a known name that has a value, remember the value (the name will of necessity be the one thing pointing to the value)
711
710
// We handle =: N V N, =: V N, =: V V N. In the last case both Vs must be ASGSAFE. When we set jt->zombieval we are warranting
// The values on the left are good: function that understands inplacing.
721
-
// The values on the right are bad, and all bits > the good bits. They are: not assignment to name; something in the stack to the right of what we are about to execute;
720
+
// The values on the right are bad, and all bits > the good bits. They are: not assignment to name;
722
721
// ill-behaved function (may change locales). The > means 'good and no bads', that is, inplaceable assignment
723
722
// Here we have an assignment to check. We will call subroutines, thus losing all volatile registers
724
723
if(likely(GETSTACK0PT&PTASGNLOCAL)){L*s;
@@ -733,7 +732,7 @@ endname: ;
733
732
zval=zval?zval:AFLAG0; zval=AC(zval)==(REPSGN((AFLAG(zval)&(AFRO|AFVIRTUAL))-1)&(((AFLAG(zval)>>AFNJAX)&(AFNJA>>AFNJAX))+ACUC2))?zval:0; jt->zombieval=zval; // compiler should generate BT+ADC
734
733
pmask=(pt0ecam>>(PMASKSAVEX+2))&1; // restore dyad bit after calls
735
734
}
736
-
AFactionfn=(AF)__atomic_load_n(&jt->fillv,__ATOMIC_RELAXED); // refetch the routine address early. This may chain 2 fetches, which finishes about when the indirect branch is executed
735
+
// obsolete AF actionfn=(AF)__atomic_load_n(&jt->fillv,__ATOMIC_RELAXED); // refetch the routine address early. This may chain 2 fetches, which finishes about when the indirect branch is executed
// Most of the executed fragments are executed right here. In two cases we can be sure that the stack does not need to be rescanned:
850
852
// 1. pline=2, token 0 is AVN: we have just put a noun in the first position, and if that produced an executable it would have been executed earlier.
@@ -1004,13 +1006,12 @@ RECURSIVERESULTSCHECK
1004
1006
rejectfrag:;
1005
1007
// LPAR misses the main parse table, which is just as well because it would miss later branches anyway. We pick it up here so as not to add
1006
1008
// a couple of cycles to the main parse test. Whether we stack or execute, y is still set with the next word+type
1007
-
ASSERT(((I)fs|(I)fs1)!=0,0)// without this the compiler defers evalutating fs
1009
+
// obsolete jt->shapesink[0]=(C*)((I)fs|(I)fs1); // without this the compiler defers evaluating fs
1008
1010
if(!(GETSTACK0PT&PTNOTLPAR)){ // ( with no other line. Better be ( CAVN )
1009
1011
if(likely(PTISCAVN(~stack[1].pt)==PTISRPAR0(stack[2].pt))){ // must be [1]=CAVN and [2]=RPAR. To be equal, !CAVN and RPAR-if-0 must both be 0
1010
1012
SETSTACK0PT(stack[1].pt); stack[2]=stack[1]; stack[2].t=stack[0].t; // Install result over ). Use value/type from expr, token # from ( Bottom of stack was modified, so refresh the type for it
1011
1013
stack+=2; // advance stack pointer to result
1012
1014
}else{jt->parserstackframe.parserstkend1=stack; jsignal(EVSYNTAX); FPS} // error if contents of ( not valid. Set stackpointer so we see the failing exec
1013
-
// the fs/fs1!=0 above is to trick the compiler. We really want to start loading fs as early as possible, even though it is used only in lines 0-4. By mentioning fs here, we get the load started early
1014
1015
// we fall through to rescan after ( )
1015
1016
}else{pt0ecam&=~CONJ; break;} // parse failed, return to stack next word. Must clear 'stack 2' flag
0 commit comments