You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: jsrc/p.c
+19-16Lines changed: 19 additions & 16 deletions
Original file line number
Diff line number
Diff line change
@@ -469,7 +469,7 @@ A jtparsea(J jt, A *queue, I nwds){F1PREFIP;PSTK *stack;A z,*v;
469
469
470
470
// mash into 1 register: bit 32-63 stack0pt, bit 29-31 (from CONJX) es delayline pull 3/2/1 after current word,
471
471
// (exec) 23-24,26 VJTFLGOK1+VJTFLGOK2+VASGSAFE from verb flags 27 PTNOTLPARX set if stack[0] is not (
472
-
// 25 set if first stack word AFTER the executing fragment is NOT MARK (i. e. there are executions remaining on the stack)
472
+
// 25 set if first stack word AFTER the executing fragment is NOT MARK/RPAR (i. e. there are executions remaining on the stack)
473
473
// (name resolution) 23-26 free
474
474
// (exec) 20-22 savearea for pmask for lines 0-2 (stack) 17,20 flags from at NAMEBYVALUE/NAMEABANDON
475
475
// 19 free
@@ -676,11 +676,11 @@ endname: ;
676
676
// obsolete not in J32 PSTK *fsa=(PSTK *)((I)&stack[2]+((pmask<<(BW-1))>>(BW-4))); // pointer to stack slot for the CAV to be executed, for lines 0-4 1 2 2 (2 2)
677
677
// clang creates a branch! PSTK *fsa=&stack[2], *fsa1=&stack[1]; fsa=pmask&1?fsa1:fsa; // pointer to stack slot for the CAV to be executed, for lines 0-2 1 2 2
678
678
pmask&=(I)((C*)&stack[3].pt)[3]; // finish 3d column of parse
679
-
Afs1=QCWORD(__atomic_load_n(&stack[1].a,__ATOMIC_ACQUIRE)); // in case of line 1 V0 V1 N2, we will need the flags from V1. path is fs1,fs->fs1flag to settle before the second assignment check
679
+
Afs1=__atomic_load_n(&stack[1].a,__ATOMIC_ACQUIRE); // in case of line 1 V0 V1 N2, we will need the flags from V1. path is fs1,fs->fs1flag to settle before the second assignment check
680
680
pt0ecam&=~(VJTFLGOK1+VJTFLGOK2+VASGSAFE+PTNOTLPAR+NOTFINALEXEC+(7LL<<PMASKSAVEX)); // clear all the flags we will use
681
681
682
-
if(likely(pmask!=0)){ // If all 0, nothing is dispatchable, go push next word after checking for (
683
-
// likely is an overstatement, but without it the calculation of fsa is deferred
682
+
if(pmask!=0){ // If all 0, nothing is dispatchable, go push next word after checking for (
683
+
fs1=QCWORD(fs1);// clear flags from addres
684
684
// We are going to execute an action routine. This will be an indirect branch, and it will mispredict. To reduce the cost of the misprediction,
685
685
// we want to pile up as many instructions as we can before the branch, preferably getting out of the way as many loads as possible so that they can finish
686
686
// during the pipeline restart. The perfect scenario would be that the branch restarts while the loads for the stack arguments are still loading.
@@ -698,13 +698,14 @@ endname: ;
698
698
// obsolete I notfinalexec=!PTISMARKBACKORRPAR(fsa[2]); // set if there is something on the stack after the result of this exec that could modify the value before assignment. Used after verb exec.
699
699
// obsolete pt0ecam|=notfinalexec<<NOTFINALEXECX; // remember if this exec is final. Wait till we know not fail, so we don't have to wait for (. Used after execution
700
700
if(unlikely((!PTISMARKBACKORRPAR(fsa[2]))))pt0ecam|=NOTFINALEXEC; // remember if this exec is final in its branch. Wait till we know not fail, so we don't have to wait for (. Used after execution
701
-
jt->parserstackframe.sf=fs; // set new recursion point for $:; this frees fs
701
+
pt0ecam|=pmask<<PMASKSAVEX; // save pmask over the subroutine calls - also used after the verb execution
702
702
// Get the branch-to address. It comes from the appropriate valence of the appropriate stack element. Stack element is 2 except for line 0; valence is monadic for lines 0 1 4
703
-
jt->fillv=(C*)__atomic_load_n(&FAV(fs)->valencefns[pmask>>2],__ATOMIC_RELAXED); // the routine we will execute. If we do this after the assignment block, jt gets spilled and we have 3 back-to-back dependent loads,
703
+
jt->fillv=(C*)__atomic_load_n(&FAV(fs)->valencefns[pmask>>=2],__ATOMIC_RELAXED); // the routine we will execute. If we do this after the assignment block, jt gets spilled and we have 3 back-to-back dependent loads,
704
+
// now pmask is only 1 bit, indicating 'dyad'
704
705
// which is more than the runout can cover. Symbol lookups don't use fillv. We put the atomic_load here to encourage early load of notfinalexec
705
-
pt0ecam|=pmask<<PMASKSAVEX; // save pmask over the subroutine calls - also used after the verb execution
706
+
jt->parserstackframe.sf=fs; // set new recursion point for $:; this frees fs
706
707
fs1flag&=fsflag; // include ASGSAFE from V0 (if applicable, otherwise just a duplicate of fsflag)
707
-
fsflag>>=(pmask>>2); fsflag&=VJTFLGOK1; // select the monad/dyad bit indicating inplaceability, stored in the monad bit
708
+
fsflag>>=pmask; fsflag&=VJTFLGOK1; // select the monad/dyad bit indicating inplaceability, stored in the monad bit
708
709
pt0ecam|=fsflag; // insert flag into portmanteau reg. Used in the execution
709
710
// If it is an inplaceable assignment to a known name that has a value, remember the value (the name will of necessity be the one thing pointing to the value)
710
711
// We handle =: N V N, =: V N, =: V V N. In the last case both Vs must be ASGSAFE. When we set jt->zombieval we are warranting
@@ -713,7 +714,7 @@ endname: ;
713
714
// Consider the case name =. name , 3 + +
714
715
// the name =. name , 3 will come to execution. Can it inplace? Yes, because the modified value of name will be on the stack after
715
716
// execution. That will then execute as (name' + +) creating a fork that will assign to name. So we can inplace any execution, because
716
-
// it always produces a noun and the only things exexcutable from the stack are tridents
717
+
// it always produces a noun and the only things executable from the stack are tridents
// The values on the left are good: function that understands inplacing.
@@ -730,14 +731,14 @@ endname: ;
730
731
// We require flags indicate not read-only, and usecount==2 (or 3 if NJA block) since we have raised the count of this block already if it is named and to be operated on inplace.
731
732
// The block can be virtual, if it is x/y to xdefn. We must never inplace to a virtual block
732
733
zval=zval?zval:AFLAG0; zval=AC(zval)==(REPSGN((AFLAG(zval)&(AFRO|AFVIRTUAL))-1)&(((AFLAG(zval)>>AFNJAX)&(AFNJA>>AFNJAX))+ACUC2))?zval:0; jt->zombieval=zval; // compiler should generate BT+ADC
733
-
pmask=(pt0ecam>>PMASKSAVEX)&7; // restore after calls
734
+
pmask=(pt0ecam>>(PMASKSAVEX+2))&1; // restore dyad bit after calls
734
735
}
735
736
AFactionfn=(AF)__atomic_load_n(&jt->fillv,__ATOMIC_RELAXED); // refetch the routine address early. This may chain 2 fetches, which finishes about when the indirect branch is executed
PSTK*arga=fsa; arga=pmask&4?stack:arga; Aarg1=arga[1].a;// 1st arg, monad or left dyad 2 3 1
738
+
PSTK*arga=fsa; arga=pmask?stack:arga; Aarg1=arga[1].a;// 1st arg, monad or left dyad 2 3 1
738
739
// this requires fsa to survive over the assignment, but it's faster than the alternative
739
740
// obsolete A arg2=stack[(pmask>>=1)+1].a; // 2nd arg, fs or right dyad 1 2 3 (2 3) pmask shifted right 1
740
-
arga=pmask&4?&stack[3]:arga; Aarg2=arga[0].a; // 2nd arg, fs or right dyad 1 2 3 (2 3)
741
+
arga=pmask?&stack[3]:arga; Aarg2=arga[0].a; // 2nd arg, fs or right dyad 1 2 3 (2 3)
741
742
// Create what we need to free arguments after the execution. We keep the information needed to two registers so they can persist over the call as they are needed right away on return
742
743
// (1) When the args return from the verb, we will check to see if any were inplaceable and unused. Those can be freed right away, returning them to the
743
744
// // pool and allowing their cache space to be reused. But there is a problem:
@@ -749,9 +750,8 @@ endname: ;
749
750
// The calculation of tpopa/w will run to completion while the expected indirect-branch misprediction is being processed
// LPAR misses the main parse table, which is just as well because it would miss later branches anyway. We pick it up here so as not to add
1004
1006
// a couple of cycles to the main parse test. Whether we stack or execute, y is still set with the next word+type
1007
+
ASSERT(((I)fs|(I)fs1)!=0,0) // without this the compiler defers evalutating fs
1005
1008
if(!(GETSTACK0PT&PTNOTLPAR)){ // ( with no other line. Better be ( CAVN )
1006
1009
if(likely(PTISCAVN(~stack[1].pt)==PTISRPAR0(stack[2].pt))){ // must be [1]=CAVN and [2]=RPAR. To be equal, !CAVN and RPAR-if-0 must both be 0
1007
1010
SETSTACK0PT(stack[1].pt); stack[2]=stack[1]; stack[2].t=stack[0].t; // Install result over ). Use value/type from expr, token # from ( Bottom of stack was modified, so refresh the type for it
1008
1011
stack+=2; // advance stack pointer to result
1009
-
}else{jt->parserstackframe.parserstkend1=stack; jsignal(((I)fs|(I)fs1)!=0?EVSYNTAX:0); FPS} // error if contents of ( not valid. Set stackpointer so we see the failing exec
1012
+
}else{jt->parserstackframe.parserstkend1=stack; jsignal(EVSYNTAX); FPS} // error if contents of ( not valid. Set stackpointer so we see the failing exec
1010
1013
// the fs/fs1!=0 above is to trick the compiler. We really want to start loading fs as early as possible, even though it is used only in lines 0-4. By mentioning fs here, we get the load started early
1011
1014
// we fall through to rescan after ( )
1012
1015
}else{pt0ecam&=~CONJ; break;} // parse failed, return to stack next word. Must clear 'stack 2' flag
0 commit comments