Skip to content

Commit 87313c4

Browse files
committed
Add precompile statements
new precompilation approach
1 parent 68e0eda commit 87313c4

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

src/TensorOperations.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,6 @@ function __init__()
7777
@require_extensions
7878
end
7979

80+
include("precompile.jl")
81+
8082
end # module

src/precompile.jl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
const PRECOMPILE_ELTYPES = (Float64, ComplexF64)
2+
3+
# tensoradd!
4+
# ----------
5+
const PRECOMPILE_ADD_NDIMS = 5
6+
7+
for T in PRECOMPILE_ELTYPES
8+
for N in 0:PRECOMPILE_ADD_NDIMS
9+
C = Array{T,N}
10+
A = Array{T,N}
11+
pA = Index2Tuple{N,0}
12+
13+
precompile(tensoradd!, (C, A, pA, Bool, One, Zero))
14+
precompile(tensoradd!, (C, A, pA, Bool, T, Zero))
15+
precompile(tensoradd!, (C, A, pA, Bool, T, T))
16+
17+
precompile(tensoralloc_add, (T, A, pA, Bool, Val{true}))
18+
precompile(tensoralloc_add, (T, A, pA, Bool, Val{false}))
19+
end
20+
end
21+
22+
# tensortrace!
23+
# ------------
24+
const PRECOMPILE_TRACE_NDIMS = (4, 2)
25+
26+
for T in PRECOMPILE_ELTYPES
27+
for N1 in 0:PRECOMPILE_TRACE_NDIMS[1], N2 in 0:PRECOMPILE_TRACE_NDIMS[2]
28+
C = Array{T,N1}
29+
A = Array{T,N1 + 2N2}
30+
p = Index2Tuple{N1,0}
31+
q = Index2Tuple{N2,N2}
32+
33+
precompile(tensortrace!, (C, A, p, q, Bool, One, Zero))
34+
precompile(tensortrace!, (C, A, p, q, Bool, T, Zero))
35+
precompile(tensortrace!, (C, A, p, q, Bool, T, T))
36+
37+
# allocation re-uses tensoralloc_add
38+
end
39+
end
40+
41+
# tensorcontract!
42+
# ---------------
43+
const PRECOMPILE_CONTRACT_NDIMS = (3, 2, 3)
44+
45+
for T in PRECOMPILE_ELTYPES
46+
for N1 in 0:PRECOMPILE_CONTRACT_NDIMS[1], N2 in 0:PRECOMPILE_CONTRACT_NDIMS[2],
47+
N3 in 0:PRECOMPILE_CONTRACT_NDIMS[3]
48+
49+
NA = N1 + N2
50+
NB = N2 + N3
51+
NC = N1 + N3
52+
C, A, B = Array{T,NC}, Array{T,NA}, Array{T,NB}
53+
pA = Index2Tuple{N1,N2}
54+
pB = Index2Tuple{N2,N3}
55+
pAB = Index2Tuple{NC,0}
56+
57+
precompile(tensorcontract!, (C, A, pA, Bool, B, pB, Bool, pAB, One, Zero))
58+
precompile(tensorcontract!, (C, A, pA, Bool, B, pB, Bool, pAB, T, Zero))
59+
precompile(tensorcontract!, (C, A, pA, Bool, B, pB, Bool, pAB, T, T))
60+
61+
precompile(tensoralloc_contract, (T, A, pA, Bool, B, pB, Bool, pAB, Val{true}))
62+
precompile(tensoralloc_contract, (T, A, pA, Bool, B, pB, Bool, pAB, Val{false}))
63+
end
64+
end

0 commit comments

Comments
 (0)