Skip to content

Commit 850f4d1

Browse files
author
Dave Love
committed
Kludge to fix cpuid dispatch on power
This needs fixing properly somehow, but using -O3 (at least with gcc 8.3), we get this: Program received signal SIGILL, Illegal instruction. 0x000000001004c660 in bli_cntx_init_power9_ref (cntx=0x103e06b0) at ref_kernels/bli_cntx_ref.c:456 456 for ( i = 0; i < BLIS_NUM_LEVEL3_OPS; ++i ) vfuncs[ i ] = NULL; (gdb) bt #0 0x000000001004c660 in bli_cntx_init_power9_ref (cntx=0x103e06b0) at ref_kernels/bli_cntx_ref.c:456 flame#1 0x000000001004c0a8 in bli_cntx_init_power9 (cntx=<optimized out>) at config/power9/bli_cntx_init_power9.c:42 flame#2 0x000000001003c85c in bli_gks_register_cntx (id=BLIS_ARCH_POWER9, nat_fp=0x1004c090 <bli_cntx_init_power9>, ref_fp=0x1004c0d0 <bli_cntx_init_power9_ref>, ind_fp=<optimized out>) at frame/base/bli_gks.c:373 flame#3 0x000000001003c97c in bli_gks_init () at frame/base/bli_gks.c:155 flame#4 0x000000001003cfe8 in bli_init_apis () at frame/base/bli_init.c:78 flame#5 0x00007ffff7e045a8 in __pthread_once_slow () from /lib64/libpthread.so.0 flame#6 0x00000000100492e8 in bli_pthread_once (once=<optimized out>, init=<optimized out>) at frame/thread/bli_pthread.c:314 flame#7 0x000000001003d138 in bli_init_once () at frame/base/bli_init.c:104 flame#8 bli_init_auto () at frame/base/bli_init.c:54 flame#9 0x0000000010011300 in cdotc_ (n=<optimized out>, x=<optimized out>, incx=<optimized out>, y=<optimized out>, incy=<optimized out>) at frame/compat/bla_dot.c:89 flame#10 0x0000000010002a48 in check2_ (sfac=0x103d14dc <sfac>) at blastest/src/cblat1.c:529 flame#11 0x0000000010001ef4 in main () at blastest/src/cblat1.c:112
1 parent d596dd3 commit 850f4d1

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

config/power8/make_defs.mk

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ endif
5757
ifeq ($(DEBUG_TYPE),noopt)
5858
COPTFLAGS := -O0
5959
else
60-
COPTFLAGS := -O3 -funroll-loops
60+
# Fixme: This should use -O3, but that breaks cpuid dispatch somehow,
61+
# and we end up executing power9 code on power8.
62+
COPTFLAGS := -O2 -funroll-loops
6163
endif
6264

6365
# Flags specific to optimized kernels.
@@ -71,7 +73,7 @@ endif
7173
# Flags specific to reference kernels.
7274
CROPTFLAGS := $(CKOPTFLAGS)
7375
ifeq ($(CC_VENDOR),gcc)
74-
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
76+
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -O3
7577
else
7678
ifeq ($(CC_VENDOR),clang)
7779
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast

config/power9/make_defs.mk

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ endif
5757
ifeq ($(DEBUG_TYPE),noopt)
5858
COPTFLAGS := -O0
5959
else
60-
COPTFLAGS := -O3 -funroll-loops
60+
# Fixme: This should use -O3, but that breaks cpuid dispatch somehow,
61+
# and we end up executing power9 code on power8.
62+
COPTFLAGS := -O2 -funroll-loops
6163
endif
6264

6365
# Flags specific to optimized kernels.
@@ -71,7 +73,7 @@ endif
7173
# Flags specific to reference kernels.
7274
CROPTFLAGS := $(CKOPTFLAGS)
7375
ifeq ($(CC_VENDOR),gcc)
74-
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
76+
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -O3
7577
else
7678
ifeq ($(CC_VENDOR),clang)
7779
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast

0 commit comments

Comments
 (0)