Skip to content

Commit 685dcb5

Browse files
Implement A*D*B (gemdm), A*D*A^{T,H} (syrkd/herkd), and A*D*B^{T,H}+B*D*A^{D,H} (syrk2d/her2kd) operations (and gemdmt for good measure). Some rough edges still:
- Complex herk2 and her2kd will not work. - No mixed-type/mixed-domain or 1m. - Not integrated into testsuite yet.
1 parent dc3e615 commit 685dcb5

19 files changed

+1950
-5
lines changed

frame/1m/bli_l1m_ker_ft.h

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ typedef void (*PASTECH(opname,_ker_ft)) \
5151

5252
GENTDEF( packm )
5353
GENTDEF( packm_cxk )
54+
GENTDEF( packmd_cxk )
5455
GENTDEF( unpackm_cxk )
5556
GENTDEF( packm_cxc_diag )
5657

frame/1m/bli_l1m_ker_params.h

+15
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,21 @@
8181
void* p, inc_t ldp, \
8282
const void* params \
8383

84+
#define packmd_cxk_params \
85+
\
86+
conj_t conja, \
87+
pack_t schema, \
88+
dim_t cdim, \
89+
dim_t cdim_max, \
90+
dim_t cdim_bcast, \
91+
dim_t n, \
92+
dim_t n_max, \
93+
const void* kappa, \
94+
const void* a, inc_t inca, inc_t lda, \
95+
const void* d, inc_t incd, \
96+
void* p, inc_t ldp, \
97+
const void* params \
98+
8499

85100
// unpackm_cxk kernel
86101

frame/1m/bli_l1m_ker_prot.h

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void PASTEMAC(chx,chy,funcname) \
6262
#define PACKM_DIAG_KER_PROT( ctype, ch, fn ) L1MTPROT( ctype, ch, fn, packm_cxc_diag );
6363

6464
#define PACKM_KER_PROT2( ctypex, ctypey, chx, chy, fn ) L1MTPROT2( ctypex, ctypey, chx, chy, fn, packm_cxk );
65+
#define PACKMD_KER_PROT2( ctypex, ctypey, chx, chy, fn ) L1MTPROT2( ctypex, ctypey, chx, chy, fn, packmd_cxk );
6566
#define UNPACKM_KER_PROT2( ctypex, ctypey, chx, chy, fn ) L1MTPROT2( ctypex, ctypey, chx, chy, fn, unpackm_cxk );
6667
#define PACKM_DIAG_KER_PROT2( ctypex, ctypey, chx, chy, fn ) L1MTPROT2( ctypex, ctypey, chx, chy, fn, packm_cxc_diag );
6768

frame/1m/packm/bli_packm.h

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "bli_packm_scalar.h"
4242

4343
#include "bli_packm_struc_cxk.h"
44+
#include "bli_packmd_struc_cxk.h"
4445

4546
#include "bli_packm_blk_var1.h"
4647

frame/1m/packm/bli_packmd_struc_cxk.c

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
3+
BLIS
4+
An object-based framework for developing high-performance BLAS-like
5+
libraries.
6+
7+
Copyright (C) 2014, The University of Texas at Austin
8+
9+
Redistribution and use in source and binary forms, with or without
10+
modification, are permitted provided that the following conditions are
11+
met:
12+
- Redistributions of source code must retain the above copyright
13+
notice, this list of conditions and the following disclaimer.
14+
- Redistributions in binary form must reproduce the above copyright
15+
notice, this list of conditions and the following disclaimer in the
16+
documentation and/or other materials provided with the distribution.
17+
- Neither the name(s) of the copyright holder(s) nor the names of its
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
*/
34+
35+
#include "blis.h"
36+
37+
#undef GENTFUNC2RO
38+
#define GENTFUNC2RO( ctypec_r, ctype_c, ctypep_r, ctypep, chc_r, chc, chp_r, chp, varname ) \
39+
GENTFUNC2RO_( ctypec_r, ctypec_r, ctypep_r, ctypep_r, chc_r, chc_r, chp_r, chp_r, varname ) \
40+
GENTFUNC2RO_( ctypec_r, ctypec, ctypep_r, ctypep, chc_r, chc, chp_r, chp, varname )
41+
42+
#undef GENTFUNC2RO_
43+
#define GENTFUNC2RO_( ctypec_r, ctype_c, ctypep_r, ctypep, chc_r, chc, chp_r, chp, varname ) \
44+
\
45+
void PASTEMAC(chc,chp,varname) \
46+
( \
47+
struc_t strucc, \
48+
diag_t diagc, \
49+
uplo_t uploc, \
50+
conj_t conjc, \
51+
pack_t schema, \
52+
bool invdiag, \
53+
dim_t panel_dim, \
54+
dim_t panel_len, \
55+
dim_t panel_dim_max, \
56+
dim_t panel_len_max, \
57+
dim_t panel_dim_off, \
58+
dim_t panel_len_off, \
59+
dim_t panel_bcast, \
60+
const void* kappa, \
61+
const void* c, inc_t incc, inc_t ldc, \
62+
void* p, inc_t ldp, \
63+
const void* params_, \
64+
const cntx_t* cntx \
65+
) \
66+
{ \
67+
num_t dt_c = PASTEMAC(chc,type); \
68+
num_t dt_p = PASTEMAC(chp,type); \
69+
dim_t dt_c_size = bli_dt_size( dt_c ); \
70+
\
71+
ukr_t cxk_ker_id = BLIS_PACKMD_KER; \
72+
\
73+
packmd_cxk_ker_ft f_cxk = bli_cntx_get_ukr2_dt( dt_c, dt_p, cxk_ker_id, cntx ); \
74+
\
75+
const gemmd_params* params = ( const gemmd_params* )params_; \
76+
\
77+
inc_t incd = params->incd; \
78+
const char* d = ( const char* )params->d + panel_len_off*incd*dt_c_size; \
79+
\
80+
/* For general matrices, pack and return early */ \
81+
if ( bli_is_general( strucc ) ) \
82+
{ \
83+
f_cxk \
84+
( \
85+
conjc, \
86+
schema, \
87+
panel_dim, \
88+
panel_dim_max, \
89+
panel_bcast, \
90+
panel_len, \
91+
panel_len_max, \
92+
kappa, \
93+
c, incc, ldc, \
94+
d, incd, \
95+
p, ldp, \
96+
params, \
97+
cntx \
98+
); \
99+
return; \
100+
} \
101+
\
102+
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
103+
}
104+
105+
INSERT_GENTFUNC2RO( packmd_struc_cxk )
106+
INSERT_GENTFUNC2RO_MIX_P( packmd_struc_cxk )
107+

frame/1m/packm/bli_packmd_struc_cxk.h

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
3+
BLIS
4+
An object-based framework for developing high-performance BLAS-like
5+
libraries.
6+
7+
Copyright (C) 2014, The University of Texas at Austin
8+
9+
Redistribution and use in source and binary forms, with or without
10+
modification, are permitted provided that the following conditions are
11+
met:
12+
- Redistributions of source code must retain the above copyright
13+
notice, this list of conditions and the following disclaimer.
14+
- Redistributions in binary form must reproduce the above copyright
15+
notice, this list of conditions and the following disclaimer in the
16+
documentation and/or other materials provided with the distribution.
17+
- Neither the name(s) of the copyright holder(s) nor the names of its
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
*/
34+
35+
36+
typedef struct gemmd_params
37+
{
38+
const void* d;
39+
inc_t incd;
40+
} gemmd_params;
41+
42+
43+
#undef GENTPROT2
44+
#define GENTPROT2( ctypec, ctypep, chc, chp, varname ) \
45+
\
46+
BLIS_EXPORT_BLIS void PASTEMAC(chc,chp,varname) \
47+
( \
48+
struc_t strucc, \
49+
diag_t diagc, \
50+
uplo_t uploc, \
51+
conj_t conjc, \
52+
pack_t schema, \
53+
bool invdiag, \
54+
dim_t panel_dim, \
55+
dim_t panel_len, \
56+
dim_t panel_dim_max, \
57+
dim_t panel_len_max, \
58+
dim_t panel_dim_off, \
59+
dim_t panel_len_off, \
60+
dim_t panel_bcast, \
61+
const void* kappa, \
62+
const void* c, inc_t incc, inc_t ldc, \
63+
void* p, inc_t ldp, \
64+
const void* params, \
65+
const cntx_t* cntx \
66+
);
67+
68+
INSERT_GENTPROT2_BASIC( packmd_struc_cxk )
69+
INSERT_GENTPROT2_MIX_P( packmd_struc_cxk )
70+

0 commit comments

Comments
 (0)