Skip to content

Commit 1034d90

Browse files
committed
Instrument parRCB
1 parent 2beb609 commit 1034d90

File tree

7 files changed

+57
-48
lines changed

7 files changed

+57
-48
lines changed

src/genmap-impl.h

+7
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ GenmapElements GenmapGetElements_default(GenmapHandle h);
4242
typedef enum{
4343
RSB,
4444
RCB,
45+
RCBN,
46+
AXISLEN,
47+
PARSORT,
48+
SETPROC,
49+
LOCALSORT,
50+
LOADBALANCE,
51+
RCBTRANSFER,
4552
GSSETUP,
4653
PAIRWISE,
4754
CRYSTAL,

src/genmap-rcb.c

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <sort.h>
22
#include <float.h>
33
#include <parRSB.h>
4+
#include <genmap-impl.h>
45

56
void get_axis_len(double *length,struct array *a,struct comm *c,int ndim)
67
{
@@ -35,23 +36,23 @@ int parRCB(struct comm *ci,struct array *a,int ndim){
3536
sint rank=c.id;
3637
sint size=c.np;
3738

38-
if(rank == 0){
39-
printf("running RCB ...");
40-
fflush(stdout);
41-
}
42-
4339
double length[MAXDIM];
4440

4541
while(size>1){
42+
metric_acc(RCBN,a->n);
43+
44+
metric_tic(&c,AXISLEN);
4645
get_axis_len(length,a,&c,ndim);
46+
metric_toc(&c,AXISLEN);
4747

4848
int axis1=0,d;
4949
for(d=1;d<ndim;d++)
5050
if(length[d]>length[axis1]) axis1=d;
51-
int axis2=(axis1+1)%2;
52-
for(d=0;d<ndim;d++)
53-
if(length[d]>length[axis2] && d!=axis1) axis2=d;
51+
//int axis2=(axis1+1)%2;
52+
//for(d=0;d<ndim;d++)
53+
// if(length[d]>length[axis2] && d!=axis1) axis2=d;
5454

55+
metric_tic(&c,PARSORT);
5556
switch(axis1){
5657
case 0:
5758
parallel_sort(elm_rcb,a,coord[0],gs_double,0,1,&c);
@@ -65,6 +66,9 @@ int parRCB(struct comm *ci,struct array *a,int ndim){
6566
default:
6667
break;
6768
}
69+
metric_toc(&c,PARSORT);
70+
71+
metric_push_level();
6872

6973
int p=(size+1)/2;
7074
int bin=(rank>=p);

src/genmap-rsb.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ void GenmapRSB(GenmapHandle h,int verbose){
7777
metric_push_level();
7878
}
7979

80-
metric_print(&global_c->gsc);
80+
//metric_print(&global_c->gsc);
8181
metric_finalize();
8282

8383
crystal_free(&(h->cr));

src/genmap-statistics.c

+8-30
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ void metric_tic(struct comm *c,metric m){
3333
}
3434

3535
void metric_toc(struct comm *c,metric m){
36-
comm_barrier(c);
3736
metrics[m]+=comm_time();
37+
comm_barrier(c);
3838
}
3939

4040
void metric_push_level(){
@@ -72,35 +72,13 @@ void metric_print(struct comm *c){
7272
for(i=0; i<stack_size; i++){
7373
if(c->id==0){
7474
printf("level=%02d\n",i);
75-
#if defined(GENMAP_RCB)
76-
printf(" RCB : %g/%g/%g\n",summary(i,RCB));
77-
#endif
78-
printf(" pairwise : %g/%g/%g\n",summary(i,PAIRWISE ));
79-
printf(" crystal_router : %g/%g/%g\n",summary(i,CRYSTAL ));
80-
printf(" allreduce : %g/%g/%g\n",summary(i,ALLREDUCE ));
81-
printf(" nneighbors : %g/%g/%g\n",summary(i,NNEIGHBORS ));
82-
printf(" gs_setup : %g/%g/%g\n",summary(i,GSSETUP ));
83-
printf(" laplacian_setup : %g/%g/%g\n",summary(i,LAPLACIANSETUP));
84-
printf(" nconn : %g/%g/%g\n",summary(i,NCONN ));
85-
printf(" gs : %g/%g/%g\n",summary(i,GSOP ));
86-
printf(" laplacian : %g/%g/%g\n",summary(i,LAPLACIAN ));
87-
#if defined(GENMAP_RQI) || defined(GENMAP_FMG)
88-
printf(" precon_setup : %g/%g/%g\n",summary(i,PRECONSETUP ));
89-
printf(" precon_ax : %g/%g/%g\n",summary(i,PRECONAX ));
90-
printf(" precon_vcycle : %g/%g/%g\n",summary(i,PRECONVCYCLE ));
91-
printf(" projectpf : %g/%g/%g\n",summary(i,PROJECTPF ));
92-
printf(" nprojectpf : %g/%g/%g\n",summary(i,NPROJECTPF ));
93-
#if defined(GENMAP_RQI)
94-
printf(" RQI : %g/%g/%g\n",summary(i,RQI ));
95-
#elif defined(GENMAP_FMG)
96-
printf(" FMG : %g/%g/%g\n",summary(i,FMG ));
97-
#endif
98-
for(int j=0; j<100; j++)
99-
printf(" PPF i=%02d : %g/%g/%g\n",j,summary(i,END+j ));
100-
#endif
101-
printf(" fiedler_time : %g/%g/%g\n",summary(i,FIEDLER ));
102-
printf(" fiedler_iter : %g/%g/%g\n",summary(i,NFIEDLER ));
103-
printf(" RSB : %g/%g/%g\n",summary(i,RSB ));
75+
printf(" RCBN : %g/%g/%g\n",summary(i,RCBN ));
76+
printf(" AXISLEN : %g/%g/%g\n",summary(i,AXISLEN ));
77+
printf(" PARSORT : %g/%g/%g\n",summary(i,PARSORT ));
78+
printf(" LOCALSORT : %g/%g/%g\n",summary(i,LOCALSORT ));
79+
printf(" SETPROC : %g/%g/%g\n",summary(i,SETPROC ));
80+
printf(" RCBTRANSFER : %g/%g/%g\n",summary(i,RCBTRANSFER ));
81+
printf(" LOADBALANCE : %g/%g/%g\n",summary(i,LOADBALANCE ));
10482
}
10583
}
10684
}

src/parRCB.c

+15-9
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ int parRCB_partMesh(int *part,int *seq,double *vtx,int nel,int nv,
5050

5151
//TODO: load balance
5252

53+
buffer bfr; buffer_init(&bfr,1024);
54+
metric_init();
55+
5356
struct comm rcb;
5457
comm_ext old=c.c;
5558
#ifdef MPI
@@ -63,13 +66,15 @@ int parRCB_partMesh(int *part,int *seq,double *vtx,int nel,int nv,
6366
comm_barrier(&rcb);
6467
double time=comm_time();
6568

69+
if(c.id==0)
70+
printf("running RCB ...");
71+
fflush(stdout);
72+
6673
parRCB(&rcb,&a,ndim);
6774

6875
// do local rcb
69-
buffer bfr; buffer_init(&bfr,1024);
7076
uint s1=0,e1=a.n;
7177
rcb_local(&a,s1,e1,ndim,&bfr);
72-
buffer_free(&bfr);
7378

7479
elm_rcb *ptr=a.ptr;
7580
int i;
@@ -83,26 +88,27 @@ int parRCB_partMesh(int *part,int *seq,double *vtx,int nel,int nv,
8388
printf(" finished in %g s\n",time);
8489
fflush(stdout);
8590
}
86-
comm_free(&rcb);
8791

8892
/* restore original input */
8993
struct crystal cr; crystal_init(&cr,&c);
9094
sarray_transfer(elm_rcb,&a,orig,1,&cr);
9195
crystal_free(&cr);
92-
93-
comm_free(&c);
94-
9596
assert(a.n==nel);
97+
sarray_sort(elm_rcb,a.ptr,a.n,id,1,&bfr);
9698

97-
buffer b; buffer_init(&b,1024);
98-
sarray_sort(elm_rcb,a.ptr,a.n,id,1,&b);
99-
buffer_free(&b);
10099

101100
data=a.ptr;
102101
for(e=0;e<nel;e++){
103102
part[e]=data[e].orig;
104103
seq [e]=data[e].seq ;
105104
}
106105

106+
metric_print(&c);
107+
108+
comm_free(&rcb);
109+
metric_finalize();
110+
buffer_free(&bfr);
111+
comm_free(&c);
112+
107113
return 0;
108114
}

src/sort/binsort.c

+9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <sort-impl.h>
2+
#include <genmap-impl.h> //FIXME - include genmap-statistics
23

34
/* assumes array is locally sorted */
45
int set_bin(uint **proc_,struct sort *s,uint field,struct comm *c)
@@ -32,19 +33,27 @@ int set_bin(uint **proc_,struct sort *s,uint field,struct comm *c)
3233
int parallel_bin_sort(struct sort *s,struct comm *c)
3334
{
3435
// Local sort
36+
metric_tic(c,LOCALSORT);
3537
sort_local(s);
38+
metric_toc(c,LOCALSORT);
3639

3740
// Set destination bin
41+
metric_tic(c,SETPROC);
3842
uint *proc;
3943
set_bin(&proc,s,0,c);
44+
metric_toc(c,SETPROC);
4045

46+
metric_tic(c,RCBTRANSFER);
4147
// Transfer to destination processor
4248
struct crystal cr; crystal_init(&cr,c);
4349
sarray_transfer_ext_(s->a,s->unit_size,proc,sizeof(uint),&cr);
4450
crystal_free(&cr);
51+
metric_toc(c,RCBTRANSFER);
4552

4653
GenmapFree(proc);
4754

4855
// Locally sort again
56+
metric_tic(c,LOCALSORT);
4957
sort_local(s);
58+
metric_toc(c,LOCALSORT);
5059
}

src/sort/sort.c

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <sort-impl.h>
22
#include <float.h>
3+
#include <genmap-impl.h> //FIXME - include genmap-statistics
34

45
int set_dest(uint *proc,uint np,ulong start,uint size,ulong nelem)
56
{
@@ -122,8 +123,12 @@ int parallel_sort_private(struct sort *data,struct comm *c){
122123

123124
if(balance){
124125
struct crystal cr; crystal_init(&cr,c);
126+
metric_tic(c,LOADBALANCE);
125127
load_balance(a,usize,c,&cr);
128+
metric_toc(c,LOADBALANCE);
129+
metric_tic(c,LOCALSORT);
126130
sort_local(data);
131+
metric_toc(c,LOCALSORT);
127132
crystal_free(&cr);
128133
}
129134

0 commit comments

Comments
 (0)