Skip to content

Commit 273a55e

Browse files
committed
Rewrite inner loop of |., fixing bug when a and w frames differ
1 parent 459dad1 commit 273a55e

File tree

4 files changed

+51
-20
lines changed

4 files changed

+51
-20
lines changed

jsrc/cr.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,8 @@ static DF1(rank1in){F1PREFIP;ARGCHK1(w);DECLF; // this version when requested r
647647
jt->ranks=R2MAX; // reset rank to infinite
648648
RETF(z);
649649
}
650+
651+
// dyadic forms also check agreement wrt the given ranks
650652
static DF2(rank2i){F2PREFIP;ARGCHK1(w);DECLF; // this version when requested rank is positive
651653
I ar=sv->localuse.lu1.srank[1]; ar=ar>=AR(a)?RMAX:ar; I af=AR(a)-ar; // left rank
652654
I wr=sv->localuse.lu1.srank[2]; wr=wr>=AR(w)?RMAX:wr; I wf=AR(w)-wr; // right rank

jsrc/cv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static A jtfitct(J jt,A a,A w,I cno,A z){V*sv;
3535
// Handle i.!.1 specially; otherwise drop i. back to normal
3636
if(unlikely(cno==3))if(d==1.0){d=1.0-jt->cct; if(!SY_64)cno=0;}else cno=0; // i.!.1 is special on 64-bit systems; others just specify fit
3737
ASSERT(0<=d&&d<5.82076609134675e-11,EVDOMAIN); // can't be greater than 2^_34
38-
fdeffillall(z,0,CFIT,VERB,(AF)(jtfitct1),aff2[cno],a,w ,0L,sv->flag&(VIRS1|VIRS2|VJTFLGOK1|VJTFLGOK2|VISATOMIC1|VFCOMPCOMP),(I)(sv->mr),lrv(sv),rrv(sv),fffv->localuse.lu0.cachedloc=0,FAV(z)->localuse.lu1.cct = 1.0-d); // preserve INPLACE flags
38+
fdeffillall(z,0,CFIT,VERB,(AF)(jtfitct1),aff2[cno],a,w ,0L,sv->flag&(VIRS1|VIRS2|VJTFLGOK1|VJTFLGOK2|VISATOMIC1|VFCOMPCOMP|VASGSAFE),(I)(sv->mr),lrv(sv),rrv(sv),fffv->localuse.lu0.cachedloc=0,FAV(z)->localuse.lu1.cct = 1.0-d); // preserve INPLACE flags
3939
R z;
4040
}
4141

@@ -105,7 +105,7 @@ F2(jtfit){F2PREFIP;A f;C c;I k,l,m,r;V*sv;
105105
case CROT: case CDOLLAR: // these allow an empty array
106106
ASSERT(1>=AR(w),EVRANK);
107107
ASSERT(!AR(w)||!AN(w),EVLENGTH);
108-
fdeffill(z,0L,CFIT,VERB,jtfitfill1,jtfitfill2,a,w,0L,sv->flag&(VIRS1|VIRS2|VJTFLGOK1|VJTFLGOK2),m,l,r) RETF(z); // ^!.f
108+
fdeffill(z,0L,CFIT,VERB,jtfitfill1,jtfitfill2,a,w,0L,sv->flag&(VIRS1|VIRS2|VJTFLGOK1|VJTFLGOK2|VASGSAFE),m,l,r) RETF(z); // ^!.f
109109
case CTHORN:
110110
RE(w=sc(k=i0(w)));
111111
ASSERT(0<k,EVDOMAIN);

jsrc/vf.c

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,23 +84,32 @@ static F2(jtrotsp){PROLOG(0071);A q,x,y,z;B bx,by;I acr,af,ar,*av,d,k,m,n,p,*qv,
8484
EPILOG(z);
8585
} /* a|."r w on sparse arrays */
8686

87-
#define ROF(r) if((I )(r<-n)|(I )(r>n))r=(r<0)?-n:n; x=dk*ABS(r); y=e-x; j=0>r?y:x; k=0>r?x:y;
88-
#define ROT(r) if((I )(r<-n)|(I )(r>n))r=r%n; x=dk*ABS(r); y=e-x; j=0>r?y:x; k=0>r?x:y;
89-
87+
// set k=length that wraps, ks=offset to it in source, kd=offset to it in dest, js=source offset to part that doesn't wrap, kd=offset to it in dest
88+
// obsolete #define ROF(r) if((I )(r<-n)|(I )(r>n))r=(r<0)?-n:n; x=dk*ABS(r); y=e-x; j=0>r?y:x; k=0>r?x:y;
89+
// obsolete #define ROT(r) if((I )(r<-n)|(I )(r>n))r=r%n; x=dk*ABS(r); y=e-x; j=0>r?y:x; k=0>r?x:y;
90+
#define ROTF I ar=ABS(r); if(unlikely((UI)ar>(UI)n)){if(jt->fill)ar=n; else{r=r%n; ar=ABS(r);}} k=dk*ar; kd=e-k; ks=r<0?kd:0; jd=r<0?k:0; kd-=ks; js=k-jd; // UI in case ABS(IMIN)
91+
// obsolete #define ROF(r) ar=ABS(r); ar=MIN(ar,n); ROTFCOMMON
92+
// obsolete #define ROT(r) ar=ABS(r); if(unlikely(ar>n)){r=r%n; ar=ABS(r);} ROTFCOMMON
9093
// m=#cells d=#atoms per item n=#items per cell
91-
static void jtrot(J jt,I m,I d,I n,I atomsize,I p,I*av,C*u,C*v){I dk,e,k,j,r,x,y;
94+
static void jtrot(J jt,I m,I d,I n,I atomsize,I p,I*av,C*u,C*v){I dk,e,k,r,x,y,kd,ks,jd,js;
9295
e=n*d*atomsize; dk=d*atomsize; // e=#bytes per cell dk=bytes per item
93-
if(jt->fill){
94-
if(p<=1){r=p?*av:0; ROF(r); DQ(m, if(r<0){mvc(k,v,atomsize,jt->fillv); MC(k+v,u,j);}else{MC(v,j+u,k); mvc(j,k+v,atomsize,jt->fillv);} u+=e; v+=e;);}
95-
else{DO(m, r=av[i]; ROF(r); if(r<0){mvc(k,v,atomsize,jt->fillv); MC(k+v,u,j);}else{MC(v,j+u,k); mvc(j,k+v,atomsize,jt->fillv);} u+=e; v+=e;);}
96-
}else{
97-
if(p<=1){r=p?*av:0; ROT(r); DQ(m, MC(v,j+u,k); MC(k+v,u,j); u+=e; v+=e;);}
98-
else{DO(m, r=av[i]; ROT(r); MC(v,j+u,k); MC(k+v,u,j); u+=e; v+=e;);}
99-
}
96+
// obsolete if(jt->fill){
97+
// obsolete if(p<=1){r=p?*av:0; ROF(r); DQ(m, if(r<0){mvc(k,v,atomsize,jt->fillv); MC(k+v,u,j);}else{MC(v,j+u,k); mvc(j,k+v,atomsize,jt->fillv);} u+=e; v+=e;);}
98+
// obsolete else{DO(m, r=av[i]; ROF(r); if(r<0){mvc(k,v,atomsize,jt->fillv); MC(k+v,u,j);}else{MC(v,j+u,k); mvc(j,k+v,atomsize,jt->fillv);} u+=e; v+=e;);}
99+
// obsolete if(p<=1){r=p?*av:0; ROF(r); DQ(m, if(r<0){mvc(k,v,atomsize,jt->fillv); MC(k+v,u,j);}else{MC(v,j+u,k); mvc(j,k+v,atomsize,jt->fillv);} u+=e; v+=e;);}
100+
// obsolete else{DO(m, r=av[i]; ROF(r); if(r<0){mvc(k,v,atomsize,jt->fillv); MC(k+v,u,j);}else{MC(v,j+u,k); mvc(j,k+v,atomsize,jt->fillv);} u+=e; v+=e;);}
101+
// obsolete }else{
102+
// obsolete if(p<=1){r=p?*av:0; ROT(r); DQ(m, MC(v,j+u,k); MC(k+v,u,j); u+=e; v+=e;);}
103+
// obsolete else{DO(m, r=av[i]; ROT(r); MC(v,j+u,k); MC(k+v,u,j); u+=e; v+=e;);}
104+
if(p<=1){r=p?*av:0; ROTF(r); DQ(m, MC(v+jd,u+js,e-k); if(!jt->fill)MC(v+kd,u+ks,k); else mvc(k,v+kd,atomsize,jt->fillv); u+=e; v+=e;);} // move fill last in case inplace
105+
else{DO(m, r=av[i]; ROTF(r); MC(v+jd,u+js,e-k); if(!jt->fill)MC(v+kd,u+ks,k); else mvc(k,v+kd,atomsize,jt->fillv); u+=e; v+=e;);}
106+
// obsolete }
100107
}
101108

109+
110+
102111
/* m # cells
103-
c # atoms in each cell
112+
d # atoms in each cell
104113
n # items in each cell
105114
k # bytes in each atom
106115
p length of av
@@ -116,17 +125,28 @@ F2(jtrotate){A origw=w,y,z;B b;C*u,*v;I acr,af,ar,*av,d,k,m,n,p,*s,wcr,wf,wn,wr;
116125
RZ(a=vi(a));
117126
// special case: if a is atomic 0, and cells of w are not atomic
118127
if((wcr!=0)&(((ar|IAV(a)[0])==0)))R RETARG(w); // 0 |. y, return y
119-
if(((1-acr)|((-af)&(-acr|(wf-1))))<0)R rank2ex(a,w,DUMMYSELF,MIN(acr,1),wcr,acr,wcr,jtrotate); // if multiple a-lists per cell, or a has frame and (a cell is not an atom or w has no frame) handle rank by using " for it
128+
// We support IRS in a limited way. We revert to the rank loop if:
129+
// 1 cell-rank of a>1 (we have to replicate w)
130+
// 2 a has frame, if: cell-rank of a > 0 (we have to match cells of a)
131+
// OR frame of w does not equal frame of a (agreement has already been checked in the caller, if IRS)
132+
// (in the case where a and w frames are equal, we apply each atom of a to one cell of w since a cell-rank is 0)
133+
if(((1-acr)|((-af)&(-acr|-(af^wf))))<0)R rank2ex(a,w,DUMMYSELF,MIN(acr,1),wcr,acr,wcr,jtrotate); // revert if we can't match a and w easily
120134
if(((wcr-1)&(1-p))<0){RZ(w=reshape(apip(shape(w),apv(p,1L,0L)),w)); wr=wcr=p;} // if cell is an atom, extend it up to #axes being rotated !wcr && p>1
121135
ASSERT(((-wcr)&(wcr-p))>=0,EVLENGTH); // !wcr||p<=wcr !(wcr&&p>wcr)
122-
av=AV(a);
123-
RZ(w=setfv(w,w)); u=CAV(w); wn=AN(w); s=AS(w); k=bpnoun(AT(w)); // set fill value if given
124-
GA(z,AT(w),wn,wr,s); v=CAV(z);
136+
av=AV(a); z=0; // init no result allocated
137+
if(jt->fill){
138+
// if there is fill, we can do the rotate inplace
139+
RZ(w=setfv(w,w)); // set fill value if given
140+
z=ASGNINPLACESGN(SGNIF(jtinplace,JTINPLACEWX),w)?w:z; // inplace allowed, just one cell, result rank (an) <= current rank (so rank fits), usecount is right
141+
}
142+
u=CAV(w); wn=AN(w); s=AS(w); k=bpnoun(AT(w));
143+
if(z==0)GA(z,AT(w),wn,wr,s); v=CAV(z); // allocate result area, unless we are inplacing into w
125144
if(!wn)R z;
126145
PROD(m,wf,s); PROD(d,wr-wf-1,s+wf+1); SETICFR(w,wf,wcr,n); // m=#cells of w, n=#items per cell d=#atoms per item of cell
127-
rot(m,d,n,k,1>=p?AN(a):1L,av,u,v);
146+
rot(m,d,n,k,1>=p?AN(a):1L,av,u,v); // rotate first axis
128147
if(1<p){
129-
GA(y,AT(w),wn,wr,s); u=CAV(y);
148+
// more than 1 axis: we ping-pong between buffers as we go down the axes
149+
GA(y,AT(w),wn,wr,s); u=CAV(y); // scaf not needed if there is fill
130150
b=0; s+=wf;
131151
DO(p-1, m*=n; n=*++s; PROD(d,wr-wf-i-2,s+1); rot(m,d,n,k,1L,av+i+1,b?u:v,b?v:u); b^=1;); // s has moved past the frame
132152
z=b?y:z;

test/g231.ijs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,12 @@ _2 3 (|."1 2 -: rot"1 2) j./?2 2 3 7$1000
398398
(2 3,:4 5) (|. -: rot) ?7 9$1000
399399
(?2 3 2$10) (|."2 -: rot"2) ?2 3 4$1000
400400

401+
0 1 2 3 (|."0 1 -: {{ x |. y }}"0 1) i. 4 5 6
402+
(2 2 1$0 1 2 3) (|."0 1 -: {{ x |. y }}"0 1) i. 2 6
403+
404+
NB. Inplace if fill
405+
a =: i. 1e6
406+
5000 > 7!:2 'a =: 5 |.!.0 a'
401407

402408
NB. x|.!.f"r y ----------------------------------------------------------
403409

@@ -435,6 +441,9 @@ _2 3 (|.!.f"1 2 -: rot"1 2) j./?2 2 3 7$1000 [ f=:0
435441

436442
1 2 3 (|.!.f"0 _ -: rot"0 _) ?20$10000 [ f=:9
437443

444+
0 1 2 3 (|.!.99"0 1 -: 99 {{ x |.!.m y }}"0 1) i. 4 5 6
445+
(2 2 1$0 1 2 3) (|.!.99"0 1 -: 99 {{ x |.!.m y }}"0 1) i. 2 6
446+
438447
4!:55 ;:'a adot1 adot2 sdot0 f f1 rank rev rot x'
439448
randfini''
440449

0 commit comments

Comments
 (0)