Skip to content

Commit cb73378

Browse files
committed
128!:9 quad-precision
1 parent 7c80ff3 commit cb73378

File tree

2 files changed

+70
-42
lines changed

2 files changed

+70
-42
lines changed

jsrc/vfrom.c

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,26 +1124,27 @@ F1(jtmvmsparse){PROLOG(832);
11241124
ASSERT(AR(w)==1,EVRANK);
11251125
ASSERT(AN(w)>=5,EVLENGTH); // audit overall w
11261126
ASSERT(AT(w)&BOX,EVDOMAIN);
1127+
A box0=C(AAV(w)[0]), box1=C(AAV(w)[1]), box2=C(AAV(w)[2]), box3=C(AAV(w)[3]), box4=C(AAV(w)[4]), box5=C(AAV(w)[5]);
11271128
// check ranks
1128-
ASSERT(AR(C(AAV(w)[0]))<=1,EVRANK); // ndx
1129-
ASSERT(AR(C(AAV(w)[1]))==3&&AS(C(AAV(w)[1]))[1]==2&&AS(C(AAV(w)[1]))[2]==1,EVRANK); // Ax, shape cols,2 1
1130-
ASSERT(AR(C(AAV(w)[2]))==1,EVRANK); // Am
1131-
ASSERT(AR(C(AAV(w)[3]))==1,EVRANK); // Av
1132-
ASSERT(AR(C(AAV(w)[4]))==2||(AR(C(AAV(w)[4]))==3&&AR(C(AAV(w)[0]))==0),EVRANK); // M
1129+
ASSERT(AR(box0)<=1,EVRANK); // ndx
1130+
ASSERT(AR(box1)==3&&AS(box1)[1]==2&&AS(box1)[2]==1,EVRANK); // Ax, shape cols,2 1
1131+
ASSERT(AR(box2)==1,EVRANK); // Am
1132+
ASSERT(AR(box3)==1,EVRANK); // Av
1133+
ASSERT(BETWEENC(AR(box4),2,3),EVRANK); // M can be double or quad. If quad, low part is ignored for DIP/Dpiv
11331134
// abort if no columns
1134-
if(AN(C(AAV(w)[0]))==0)R num(6); // if no cols (which happens at startup, return error indic)
1135+
if(AN(box0)==0)R num(6); // if no cols (which happens at startup, return error indic)
11351136
// check types. Don't convert - force the user to get it right
1136-
ASSERT(AT(C(AAV(w)[1]))&INT,EVDOMAIN); // Ax, shape cols,2 1
1137-
ASSERT(AT(C(AAV(w)[2]))&INT,EVDOMAIN); // Am
1138-
ASSERT(AT(C(AAV(w)[3]))&FL,EVDOMAIN); // Av
1139-
ASSERT(AT(C(AAV(w)[4]))&FL,EVDOMAIN); // M
1137+
ASSERT(AT(box1)&INT,EVDOMAIN); // Ax, shape cols,2 1
1138+
ASSERT(AT(box2)&INT,EVDOMAIN); // Am
1139+
ASSERT(AT(box3)&FL,EVDOMAIN); // Av
1140+
ASSERT(AT(box4)&FL,EVDOMAIN); // M
11401141
// check agreement
1141-
ASSERT(AS(C(AAV(w)[2]))[0]==AS(C(AAV(w)[3]))[0],EVLENGTH); // Am and Av
1142-
ASSERT(AS(C(AAV(w)[4]))[AR(C(AAV(w)[4]))-2]==AS(C(AAV(w)[4]))[AR(C(AAV(w)[4]))-1],EVLENGTH); // M is square
1142+
ASSERT(AS(box2)[0]==AS(box3)[0],EVLENGTH); // Am and Av
1143+
ASSERT(AS(box4)[AR(box4)-2]==AS(box4)[AR(box4)-1],EVLENGTH); // M is square
11431144

11441145
// indexes must be an atom, a single list of integers, or a list of boxes containing integers
11451146
// we don't allow conversion so as to force the user to get it right, for speed
1146-
A ndxa=C(AAV(w)[0]); ASSERT(AT(ndxa)&BOX+INT,EVDOMAIN);
1147+
A ndxa=box0; ASSERT(AT(ndxa)&BOX+INT,EVDOMAIN);
11471148
if(likely(AT(ndxa)&BOX)){ // if list of boxes, ensure each holds a list of integers, possibly empty
11481149
I ncols=0;
11491150
DO(AN(ndxa), ASSERT(AN(AAV(ndxa)[i])==0||(AT(AAV(ndxa)[i])&INT),EVDOMAIN) ASSERT(AR(AAV(ndxa)[i])<=1,EVDOMAIN) ncols+=AN(AAV(ndxa)[i]); )
@@ -1155,7 +1156,7 @@ F1(jtmvmsparse){PROLOG(832);
11551156
// extract pointers to tables
11561157
D minimp=0.0; // (always neg) min improvement we will accept, best improvement in any column so far. Init to 0 so we take first column with a pivot
11571158

1158-
I n=AS(C(AAV(w)[4]))[1]; // n=#rows/cols in M
1159+
I n=AS(box4)[1]; // n=#rows/cols in M
11591160
// convert types as needed; set ?v=pointer to data area for ?
11601161
D *bv; // pointer to b values if there are any
11611162
__m256d thresh; // ColThr Inf bkmin MinPivot validity thresholds, small positive values - for one-column mode, all lanes have the threshold for zero-clamp
@@ -1168,57 +1169,60 @@ F1(jtmvmsparse){PROLOG(832);
11681169
I *exlist=0, nexlist, *yk; // exclusion list: list of excluded col|row pairs, length
11691170
D bkmin; // the largest value for which bk is considered close enough to 0
11701171

1171-
if(AR(C(AAV(w)[0]))==0){
1172+
if(AR(box0)==0){
11721173
// single index value. set bv=0, zv non0 as a flag that we are storing the column
11731174
bv=0; ASSERT(AN(w)==6,EVLENGTH); // if goodvec is an atom, set bv=0 to indicate that bv is not used and verify no more input
1174-
if(unlikely(n==0)){R reshape(drop(num(-1),shape(C(AAV(w)[4]))),zeroionei(0));} // empty M, each product is 0
1175-
ASSERT(AR(C(AAV(w)[5]))==0,EVRANK); ASSERT(AT(C(AAV(w)[5]))&FL,EVDOMAIN); // thresh must be a float atom
1176-
I epcol=AR(C(AAV(w)[4]))==3; // flag if we are doing an extended-precision column fetch
1177-
GATV(z,FL,n<<epcol,1+epcol,AS(C(AAV(w)[4]))); zv=DAV(z); // allocate the result area for column extraction. Set zv nonzero so we use bkgrd of i. #M
1175+
if(unlikely(n==0)){R reshape(drop(num(-1),shape(box4)),zeroionei(0));} // empty M, each product is 0
1176+
ASSERT(AR(box5)==0,EVRANK); ASSERT(AT(box5)&FL,EVDOMAIN); // thresh must be a float atom
1177+
I epcol=AR(box4)==3; // flag if we are doing an extended-precision column fetch
1178+
GATV(z,FL,n<<epcol,1+epcol,AS(box4)); zv=DAV(z); // allocate the result area for column extraction. Set zv nonzero so we use bkgrd of i. #M
11781179
bvgrd0=0; bvgrde=bvgrd0+n; // length of column is #M
1179-
thresh=_mm256_set1_pd(DAV(C(AAV(w)[5]))[0]); // load threshold in all lanes
1180+
thresh=_mm256_set1_pd(DAV(box5)[0]); // load threshold in all lanes
11801181
}else{
11811182
// A list of index values. We are doing the DIP calculation or Dpiv
1182-
ASSERT(AR(C(AAV(w)[5]))==1,EVRANK); ASSERT(AN(C(AAV(w)[5]))==0||AT(C(AAV(w)[5]))&INT,EVDOMAIN); bvgrd0=IAV(C(AAV(w)[5])); bvgrde=bvgrd0+AN(C(AAV(w)[5])); // bkgrd: the order of processing the rows, and end+1 ptr normally /: bk
1183-
if(AN(C(AAV(w)[5]))==0){RETF(num(6))} // empty bk - give error/empty result 6
1184-
ASSERT(BETWEENC(AN(w),8,11),EVLENGTH);
1185-
ASSERT(AR(C(AAV(w)[8]))<=1,EVRANK); // Frow, one per row of M and column of A
1186-
if(AN(C(AAV(w)[8]))==0)Frow=0;else{ASSERT(AT(C(AAV(w)[8]))&FL,EVDOMAIN); ASSERT(AN(C(AAV(w)[8]))==AS(C(AAV(w)[4]))[0]+AS(C(AAV(w)[1]))[0],EVLENGTH); Frow=DAV(C(AAV(w)[8]));} // if Frow omitted we are looking to make bks nonzero
1187-
ASSERT(AR(C(AAV(w)[6]))<=1,EVRANK); ASSERT(AT(C(AAV(w)[6]))&FL,EVDOMAIN); ASSERT(AN(C(AAV(w)[6]))==7,EVLENGTH); // 7 float constants
1183+
ASSERT(AR(box5)==1,EVRANK); ASSERT(AN(box5)==0||AT(box5)&INT,EVDOMAIN); bvgrd0=IAV(box5); bvgrde=bvgrd0+AN(box5); // bkgrd: the order of processing the rows, and end+1 ptr normally /: bk
1184+
if(AN(box5)==0){RETF(num(6))} // empty bk - give error/empty result 6
1185+
ASSERT(BETWEENC(AN(w),9,11),EVLENGTH);
1186+
A box6=C(AAV(w)[6]), box7=C(AAV(w)[7]), box8=C(AAV(w)[8]);
1187+
ASSERT(AR(box8)<=1,EVRANK); // Frow, one per row of M and column of A
1188+
if(AN(box8)==0)Frow=0;else{ASSERT(AT(box8)&FL,EVDOMAIN); ASSERT(AN(box8)==n+AS(box1)[0],EVLENGTH); Frow=DAV(box8);} // if Frow omitted we are looking to make bks nonzero
1189+
ASSERT(AR(box6)<=1,EVRANK); ASSERT(AT(box6)&FL,EVDOMAIN); ASSERT(AN(box6)==7,EVLENGTH); // 7 float constants
11881190
if(unlikely(n==0)){RETF(num(6))} // empty M - should not occur, give error result 6
1189-
DO(AN(C(AAV(w)[5])), ASSERT((UI)bvgrd0[i]<(UI)n,EVINDEX); ) // verify bv indexes in bounds if M not empty
1190-
bkmin=DAV(C(AAV(w)[6]))[2];
1191-
thresh=_mm256_set_pd(DAV(C(AAV(w)[6]))[1],bkmin,inf,DAV(C(AAV(w)[6]))[0]); nfreecolsd=(DAV(C(AAV(w)[6]))[3]); ncolsd=(DAV(C(AAV(w)[6]))[4]); impfac=DAV(C(AAV(w)[6]))[5]; prirow=(I)DAV(C(AAV(w)[6]))[6];
1192-
ASSERT(AR(C(AAV(w)[7]))<=1,EVRANK);
1193-
if(AN(C(AAV(w)[7]))!=0){
1191+
DO(AN(box5), ASSERT((UI)bvgrd0[i]<(UI)n,EVINDEX); ) // verify bv indexes in bounds if M not empty
1192+
bkmin=DAV(box6)[2];
1193+
thresh=_mm256_set_pd(DAV(box6)[1],bkmin,inf,DAV(box6)[0]); nfreecolsd=(DAV(box6)[3]); ncolsd=(DAV(box6)[4]); impfac=DAV(box6)[5]; prirow=(I)DAV(box6)[6];
1194+
ASSERT(BETWEENC(AR(box7),1,2),EVRANK);
1195+
if(AN(box7)!=0){
11941196
// Normal DIP calculation
1195-
ASSERT(AT(C(AAV(w)[7]))&FL,EVDOMAIN); ASSERT(AN(C(AAV(w)[7]))==AS(C(AAV(w)[4]))[0],EVLENGTH); bv=DAV(C(AAV(w)[7])); // bk, one per row of M
1196-
zv=AN(C(AAV(w)[5]))==AN(C(AAV(w)[7]))?Frow:0; // set zv nonzero as a flag to process leading columns in order, until we have an improvement to shoot at. Do this only if ALL values in bk are to be processed
1197+
ASSERT(AT(box7)&FL,EVDOMAIN); ASSERT(AS(box7)[AR(box7)-1]==n,EVLENGTH); bv=DAV(box7); // bk, one per row of M
1198+
zv=AN(box5)==AS(box7)[AR(box7)-1]?Frow:0; // set zv nonzero as a flag to process leading columns in order, until we have an improvement to shoot at. Do this only if ALL values in bk are to be processed
11971199
if(AN(w)>9){
11981200
// nonimproving pivots with exlist
1201+
A box9=C(AAV(w)[9]), box10=C(AAV(w)[10]);
11991202
ASSERT(AN(w)==11,EVLENGTH);
12001203
// An exclusion list is given (and thus also yk). Remember their addresses. Its presence puts us through the 'nonimproving path' case
1201-
exlist=IAV(C(AAV(w)[9])); // remember address of exclusions
1202-
nexlist=AN(C(AAV(w)[9])); // and length of list
1203-
ASSERT(AR(C(AAV(w)[9]))<=1,EVRANK); ASSERT(nexlist==0||ISDENSETYPE(AT(C(AAV(w)[9])),INT),EVDOMAIN); // must be integer list
1204-
ASSERT(AR(C(AAV(w)[10]))<=1,EVRANK); ASSERT(ISDENSETYPE(AT(C(AAV(w)[10])),INT),EVDOMAIN); ASSERT(AN(C(AAV(w)[10]))==AS(C(AAV(w)[4]))[0],EVLENGTH); // yk, one per row of M
1205-
yk=IAV(C(AAV(w)[10])); // remember address of translation table of row# to basis column#
1204+
exlist=IAV(box9); // remember address of exclusions
1205+
nexlist=AN(box9); // and length of list
1206+
ASSERT(AR(box9)<=1,EVRANK); ASSERT(nexlist==0||ISDENSETYPE(AT(box9),INT),EVDOMAIN); // must be integer list
1207+
ASSERT(AR(box10)<=1,EVRANK); ASSERT(ISDENSETYPE(AT(box10),INT),EVDOMAIN); ASSERT(AN(box10)==n,EVLENGTH); // yk, one per row of M
1208+
yk=IAV(box10); // remember address of translation table of row# to basis column#
12061209
}
12071210
}else{
12081211
// Dpiv counting, with Dpiv. exlist is the Dpiv area
1212+
A box9=C(AAV(w)[9]);
12091213
ASSERT(AN(w)==10,EVLENGTH);
12101214
bv=zv=0;
1211-
exlist=IAV(C(AAV(w)[9])); // remember address of exclusions
1212-
ASSERT(AR(C(AAV(w)[9]))==1,EVRANK); ASSERT(ISDENSETYPE(AT(C(AAV(w)[9])),INT),EVDOMAIN); // must be integer list
1213-
ASSERT(AN(C(AAV(w)[9]))==AS(C(AAV(w)[1]))[0]+AS(C(AAV(w)[4]))[0],EVLENGTH); // length of Dpiv is #cols of A + #rows of A (=M)
1215+
exlist=IAV(box9); // remember address of exclusions
1216+
ASSERT(AR(box9)==1,EVRANK); ASSERT(ISDENSETYPE(AT(box9),INT),EVDOMAIN); // must be integer list
1217+
ASSERT(AN(box9)==AS(box1)[0]+n,EVLENGTH); // length of Dpiv is #cols of A + #rows of A (=M)
12141218
z=mtv; // no error is possible; use harmless return value
12151219
}
12161220
}
12171221

12181222

12191223
#define YC(n) .n=n,
12201224
struct mvmctx opctx={.ctxlock=0,.abortcolandrow=-1,.bestcolandrow={-1,-1},YC(ndxa)YC(n)YC(minimp)YC(bv)YC(thresh)YC(bestcol)YC(bestcolrow)YC(zv)YC(Frow)YC(nfreecolsd)
1221-
YC(ncolsd)YC(impfac)YC(prirow)YC(bvgrd0)YC(bvgrde)YC(exlist)YC(nexlist)YC(yk)YC(bkmin).axv=((I(*)[2])IAV(C(AAV(w)[1])))-n,.amv0=IAV(C(AAV(w)[2])),.avv0=DAV(C(AAV(w)[3])),.qk=C(AAV(w)[4]),
1225+
YC(ncolsd)YC(impfac)YC(prirow)YC(bvgrd0)YC(bvgrde)YC(exlist)YC(nexlist)YC(yk)YC(bkmin).axv=((I(*)[2])IAV(box1))-n,.amv0=IAV(box2),.avv0=DAV(box3),.qk=box4,
12221226
.ndotprods=0,.ncolsproc=0,.taskmask=0};
12231227
#undef YC
12241228

test/g520.ijs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,30 @@ assert. '' prtpms 128!:9 (1 2 3);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;1 2 3;2.5
246246
assert Dpiv prtpms 1 4 6 2 _1
247247
assert. '' prtpms 128!:9 (0 1 2 3);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;0 3;2.5 0 0 0 0 0 _1;'';'';Dpiv NB. sub
248248

249+
NB. Quad-precision M
250+
bk =. _2 1 3 1e_8
251+
M =. 2 {. ,: |: _4 ]\ 0. 1 3 0 0 0.5 3 0 1 0 0 0 0 1e_9 0 0 NB. input by columns
252+
cons=.1e_11 1e_6 0.0 0 1. 1.0 _1 NB. ColThr MinPivot #freepasses #improvements #amounttoimproveby prirow
253+
Frow=. _4 _3 _2 _1. 0.
254+
bkg=.i.4
255+
NB. Test DIP mode - on identity cols
256+
assert. 0 0 1 4 10 _4 prtpms (128!:9) 0 1 2 3;(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow
257+
assert. 0 0 1 4 13 _4 prtpms (128!:9) 1 0 2 3;(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow
258+
assert. 0 0 1 4 13 _4 prtpms (128!:9) 3 1 0 2;(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow
259+
assert. 1 3 1 1 4 0 prtpms (128!:9) (,3);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow NB. dangerous pivot
260+
bk =. 2 {. ,: bk NB. Repeat with quad-prec bk
261+
assert. 0 0 1 4 10 _4 prtpms (128!:9) 0 1 2 3;(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow
262+
assert. 0 0 1 4 13 _4 prtpms (128!:9) 1 0 2 3;(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow
263+
assert. 0 0 1 4 13 _4 prtpms (128!:9) 3 1 0 2;(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow
264+
assert. 1 3 1 1 4 0 prtpms (128!:9) (,3);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;bkg;cons;bk;Frow NB. dangerous pivot
265+
M=. 2 {. ,: |: _4 ]\ _1. 0 2 3 0 2 3 4 2 3 4 5 3 4 5 6 NB. quad-prec
266+
Dpiv=. (1+4) $ _1
267+
assert. '' prtpms 128!:9 (0 1 2);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;0 1 2;1.0 0 0 0 0 0 0;'';'';Dpiv NB. init
268+
assert Dpiv prtpms 1 2 3 _1 _1
269+
assert. '' prtpms 128!:9 (1 2 3);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;1 2 3;2.5 0 0 0 0 0 1;'';'';Dpiv NB. add
270+
assert Dpiv prtpms 1 4 6 2 _1
271+
assert. '' prtpms 128!:9 (0 1 2 3);(,."1 (_2) ]\ 00 0);(0$00);(0$0.0);M;0 3;2.5 0 0 0 0 0 _1;'';'';Dpiv NB. sub
272+
249273
NB. Repeat multithreaded
250274
bxr =. </.~ 0 1 $~ $ NB. 2 threads
251275
mt3 =. -:&(3&{.)

0 commit comments

Comments
 (0)