Skip to content

Commit f7a8349

Browse files
CHOLMOD: fixing OpenMP private(...), and GPU allocation (start/end)
1 parent 9439bda commit f7a8349

11 files changed

+328
-218
lines changed

CHOLMOD/Config/cholmod.h.in

+3
Original file line numberDiff line numberDiff line change
@@ -4063,6 +4063,9 @@ int cholmod_l_gpu_probe ( cholmod_common *Common ) ;
40634063
int cholmod_gpu_deallocate ( cholmod_common *Common ) ;
40644064
int cholmod_l_gpu_deallocate ( cholmod_common *Common ) ;
40654065

4066+
int cholmod_gpu_start ( cholmod_common *Common ) ;
4067+
int cholmod_l_gpu_start ( cholmod_common *Common ) ;
4068+
40664069
void cholmod_gpu_end ( cholmod_common *Common ) ;
40674070
void cholmod_l_gpu_end ( cholmod_common *Common ) ;
40684071

CHOLMOD/Demo/cholmod_di_demo.c

+14-8
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ int main (int argc, char **argv)
123123

124124
cm = &Common ;
125125
cholmod_start (cm) ;
126-
cm->print = 4 ;
126+
cm->print = 3 ;
127127

128128
cm->prefer_zomplex = prefer_zomplex ;
129129

@@ -279,6 +279,12 @@ int main (int argc, char **argv)
279279
// analyze and factorize
280280
//--------------------------------------------------------------------------
281281

282+
double maxresid = 0 ;
283+
284+
for (int overall_trials = 0 ; overall_trials <= 1 ; overall_trials++)
285+
{
286+
printf ("\n=== Overall Trial: %d\n", overall_trials) ;
287+
282288
t = CPUTIME ;
283289
L = cholmod_analyze (A, cm) ;
284290
ta = CPUTIME - t ;
@@ -704,21 +710,20 @@ int main (int argc, char **argv)
704710
{
705711
printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ;
706712
}
707-
printf ("analyze cputime: %12.4f\n", ta) ;
708-
printf ("factor cputime: %12.4f mflop: %8.1f\n", tf,
713+
printf ("analyze time: %12.4f\n", ta) ;
714+
printf ("factor time: %12.4f mflop: %8.1f\n", tf,
709715
(tf == 0) ? 0 : (1e-6*cm->fl / tf)) ;
710-
printf ("solve cputime: %12.4f mflop: %8.1f\n", ts [0],
716+
printf ("solve time: %12.4f mflop: %8.1f\n", ts [0],
711717
(ts [0] == 0) ? 0 : (1e-6*4*cm->lnz / ts [0])) ;
712-
printf ("overall cputime: %12.4f mflop: %8.1f\n",
718+
printf ("overall time: %12.4f mflop: %8.1f\n",
713719
tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ;
714-
printf ("solve cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
720+
printf ("solve time: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
715721
(ts [1] == 0) ? 0 : (1e-6*4*cm->lnz / ts [1]), NTRIALS) ;
716-
printf ("solve2 cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
722+
printf ("solve2 time: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
717723
(ts [2] == 0) ? 0 : (1e-6*4*cm->lnz / ts [2]), NTRIALS) ;
718724
printf ("peak memory usage: %12.0f (MB)\n",
719725
(double) (cm->memory_usage) / 1048576.) ;
720726
printf ("residual (|Ax-b|/(|A||x|+|b|)): ") ;
721-
double maxresid = 0 ;
722727
for (method = 0 ; method <= nmethods ; method++)
723728
{
724729
printf ("%8.2e ", resid [method]) ;
@@ -743,6 +748,7 @@ int main (int argc, char **argv)
743748

744749
cholmod_free_factor (&L, cm) ;
745750
cholmod_free_dense (&X, cm) ;
751+
}
746752

747753
//--------------------------------------------------------------------------
748754
// free matrices and finish CHOLMOD

CHOLMOD/Demo/cholmod_dl_demo.c

+14-8
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ int main (int argc, char **argv)
123123

124124
cm = &Common ;
125125
cholmod_l_start (cm) ;
126-
cm->print = 4 ;
126+
cm->print = 3 ;
127127

128128
cm->prefer_zomplex = prefer_zomplex ;
129129

@@ -279,6 +279,12 @@ int main (int argc, char **argv)
279279
// analyze and factorize
280280
//--------------------------------------------------------------------------
281281

282+
double maxresid = 0 ;
283+
284+
for (int overall_trials = 0 ; overall_trials <= 1 ; overall_trials++)
285+
{
286+
printf ("\n=== Overall Trial: %d\n", overall_trials) ;
287+
282288
t = CPUTIME ;
283289
L = cholmod_l_analyze (A, cm) ;
284290
ta = CPUTIME - t ;
@@ -704,21 +710,20 @@ int main (int argc, char **argv)
704710
{
705711
printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ;
706712
}
707-
printf ("analyze cputime: %12.4f\n", ta) ;
708-
printf ("factor cputime: %12.4f mflop: %8.1f\n", tf,
713+
printf ("analyze time: %12.4f\n", ta) ;
714+
printf ("factor time: %12.4f mflop: %8.1f\n", tf,
709715
(tf == 0) ? 0 : (1e-6*cm->fl / tf)) ;
710-
printf ("solve cputime: %12.4f mflop: %8.1f\n", ts [0],
716+
printf ("solve time: %12.4f mflop: %8.1f\n", ts [0],
711717
(ts [0] == 0) ? 0 : (1e-6*4*cm->lnz / ts [0])) ;
712-
printf ("overall cputime: %12.4f mflop: %8.1f\n",
718+
printf ("overall time: %12.4f mflop: %8.1f\n",
713719
tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ;
714-
printf ("solve cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
720+
printf ("solve time: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
715721
(ts [1] == 0) ? 0 : (1e-6*4*cm->lnz / ts [1]), NTRIALS) ;
716-
printf ("solve2 cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
722+
printf ("solve2 time: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
717723
(ts [2] == 0) ? 0 : (1e-6*4*cm->lnz / ts [2]), NTRIALS) ;
718724
printf ("peak memory usage: %12.0f (MB)\n",
719725
(double) (cm->memory_usage) / 1048576.) ;
720726
printf ("residual (|Ax-b|/(|A||x|+|b|)): ") ;
721-
double maxresid = 0 ;
722727
for (method = 0 ; method <= nmethods ; method++)
723728
{
724729
printf ("%8.2e ", resid [method]) ;
@@ -743,6 +748,7 @@ int main (int argc, char **argv)
743748

744749
cholmod_l_free_factor (&L, cm) ;
745750
cholmod_l_free_dense (&X, cm) ;
751+
}
746752

747753
//--------------------------------------------------------------------------
748754
// free matrices and finish CHOLMOD

CHOLMOD/Demo/cholmod_si_demo.c

+14-8
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ int main (int argc, char **argv)
123123

124124
cm = &Common ;
125125
cholmod_start (cm) ;
126-
cm->print = 4 ;
126+
cm->print = 3 ;
127127

128128
cm->prefer_zomplex = prefer_zomplex ;
129129

@@ -279,6 +279,12 @@ int main (int argc, char **argv)
279279
// analyze and factorize
280280
//--------------------------------------------------------------------------
281281

282+
double maxresid = 0 ;
283+
284+
for (int overall_trials = 0 ; overall_trials <= 1 ; overall_trials++)
285+
{
286+
printf ("\n=== Overall Trial: %d\n", overall_trials) ;
287+
282288
t = CPUTIME ;
283289
L = cholmod_analyze (A, cm) ;
284290
ta = CPUTIME - t ;
@@ -704,21 +710,20 @@ int main (int argc, char **argv)
704710
{
705711
printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ;
706712
}
707-
printf ("analyze cputime: %12.4f\n", ta) ;
708-
printf ("factor cputime: %12.4f mflop: %8.1f\n", tf,
713+
printf ("analyze time: %12.4f\n", ta) ;
714+
printf ("factor time: %12.4f mflop: %8.1f\n", tf,
709715
(tf == 0) ? 0 : (1e-6*cm->fl / tf)) ;
710-
printf ("solve cputime: %12.4f mflop: %8.1f\n", ts [0],
716+
printf ("solve time: %12.4f mflop: %8.1f\n", ts [0],
711717
(ts [0] == 0) ? 0 : (1e-6*4*cm->lnz / ts [0])) ;
712-
printf ("overall cputime: %12.4f mflop: %8.1f\n",
718+
printf ("overall time: %12.4f mflop: %8.1f\n",
713719
tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ;
714-
printf ("solve cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
720+
printf ("solve time: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
715721
(ts [1] == 0) ? 0 : (1e-6*4*cm->lnz / ts [1]), NTRIALS) ;
716-
printf ("solve2 cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
722+
printf ("solve2 time: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
717723
(ts [2] == 0) ? 0 : (1e-6*4*cm->lnz / ts [2]), NTRIALS) ;
718724
printf ("peak memory usage: %12.0f (MB)\n",
719725
(double) (cm->memory_usage) / 1048576.) ;
720726
printf ("residual (|Ax-b|/(|A||x|+|b|)): ") ;
721-
double maxresid = 0 ;
722727
for (method = 0 ; method <= nmethods ; method++)
723728
{
724729
printf ("%8.2e ", resid [method]) ;
@@ -743,6 +748,7 @@ int main (int argc, char **argv)
743748

744749
cholmod_free_factor (&L, cm) ;
745750
cholmod_free_dense (&X, cm) ;
751+
}
746752

747753
//--------------------------------------------------------------------------
748754
// free matrices and finish CHOLMOD

CHOLMOD/Demo/cholmod_sl_demo.c

+14-8
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ int main (int argc, char **argv)
123123

124124
cm = &Common ;
125125
cholmod_l_start (cm) ;
126-
cm->print = 4 ;
126+
cm->print = 3 ;
127127

128128
cm->prefer_zomplex = prefer_zomplex ;
129129

@@ -279,6 +279,12 @@ int main (int argc, char **argv)
279279
// analyze and factorize
280280
//--------------------------------------------------------------------------
281281

282+
double maxresid = 0 ;
283+
284+
for (int overall_trials = 0 ; overall_trials <= 1 ; overall_trials++)
285+
{
286+
printf ("\n=== Overall Trial: %d\n", overall_trials) ;
287+
282288
t = CPUTIME ;
283289
L = cholmod_l_analyze (A, cm) ;
284290
ta = CPUTIME - t ;
@@ -704,21 +710,20 @@ int main (int argc, char **argv)
704710
{
705711
printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ;
706712
}
707-
printf ("analyze cputime: %12.4f\n", ta) ;
708-
printf ("factor cputime: %12.4f mflop: %8.1f\n", tf,
713+
printf ("analyze time: %12.4f\n", ta) ;
714+
printf ("factor time: %12.4f mflop: %8.1f\n", tf,
709715
(tf == 0) ? 0 : (1e-6*cm->fl / tf)) ;
710-
printf ("solve cputime: %12.4f mflop: %8.1f\n", ts [0],
716+
printf ("solve time: %12.4f mflop: %8.1f\n", ts [0],
711717
(ts [0] == 0) ? 0 : (1e-6*4*cm->lnz / ts [0])) ;
712-
printf ("overall cputime: %12.4f mflop: %8.1f\n",
718+
printf ("overall time: %12.4f mflop: %8.1f\n",
713719
tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ;
714-
printf ("solve cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
720+
printf ("solve time: %12.4f mflop: %8.1f (%d trials)\n", ts [1],
715721
(ts [1] == 0) ? 0 : (1e-6*4*cm->lnz / ts [1]), NTRIALS) ;
716-
printf ("solve2 cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
722+
printf ("solve2 time: %12.4f mflop: %8.1f (%d trials)\n", ts [2],
717723
(ts [2] == 0) ? 0 : (1e-6*4*cm->lnz / ts [2]), NTRIALS) ;
718724
printf ("peak memory usage: %12.0f (MB)\n",
719725
(double) (cm->memory_usage) / 1048576.) ;
720726
printf ("residual (|Ax-b|/(|A||x|+|b|)): ") ;
721-
double maxresid = 0 ;
722727
for (method = 0 ; method <= nmethods ; method++)
723728
{
724729
printf ("%8.2e ", resid [method]) ;
@@ -743,6 +748,7 @@ int main (int argc, char **argv)
743748

744749
cholmod_l_free_factor (&L, cm) ;
745750
cholmod_l_free_dense (&X, cm) ;
751+
}
746752

747753
//--------------------------------------------------------------------------
748754
// free matrices and finish CHOLMOD

CHOLMOD/Demo/gpu.sh

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
11
#!/bin/bash
2+
echo '========================================================================='
3+
echo '======================== on the GPU:'
4+
echo 'First analysis/factorize takes longer, to "warmup" the GPU, which is the'
5+
echo 'time to allocate and pin GPU/CPU memory pools. That only needs to be'
6+
echo 'done once for the entire application, however.'
7+
echo '========================================================================='
28
CHOLMOD_USE_GPU=1 ../build/cholmod_dl_demo < ~/nd6k.mtx
9+
echo ''
10+
echo '========================================================================='
11+
echo '======================== on the CPU:'
12+
echo '========================================================================='
313
CHOLMOD_USE_GPU=0 ../build/cholmod_dl_demo < ~/nd6k.mtx

0 commit comments

Comments
 (0)