Skip to content

Commit 430701e

Browse files
authored
CPUSummary 0.1.26 divides L3 cache size by the number of cores, so we compensate here (#157)
1 parent 76de17c commit 430701e

File tree

7 files changed

+14
-20
lines changed

7 files changed

+14
-20
lines changed

.github/workflows/ci-julia-nightly.yml

-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ jobs:
1919
matrix:
2020
arch:
2121
- x64
22-
- x86
2322
os:
2423
- ubuntu-latest
2524
- windows-latest
@@ -29,9 +28,6 @@ jobs:
2928
- '3' # GitHub runners have 2 cores, so `NUM_CORES+1` is 3
3029
version:
3130
- 'nightly'
32-
exclude:
33-
- os: macOS-latest
34-
arch: x86 # 32-bit Julia binaries are not available on macOS
3531
steps:
3632
- uses: actions/checkout@v2
3733
- uses: julia-actions/setup-julia@v1

.github/workflows/ci.yml

-4
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ jobs:
6666
matrix:
6767
arch:
6868
- x64
69-
- x86
7069
os:
7170
- ubuntu-latest
7271
- windows-latest
@@ -76,9 +75,6 @@ jobs:
7675
- '3' # GitHub runners have 2 cores, so `NUM_CORES+1` is 3
7776
version:
7877
- '1' # automatically expands to the latest stable 1.x release of Julia
79-
exclude:
80-
- os: macOS-latest
81-
arch: x86 # 32-bit Julia binaries are not available on macOS
8278
steps:
8379
- uses: actions/checkout@v2
8480
- uses: julia-actions/setup-julia@v1

Project.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Octavian"
22
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
3-
authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"]
4-
version = "0.3.17"
3+
authors = ["Chris Elrod", "Dilum Aluthge", "Mason Protter", "contributors"]
4+
version = "0.3.18"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -17,13 +17,13 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1717

1818
[compat]
1919
ArrayInterface = "3.1.14, 5.0.1, 6"
20-
CPUSummary = "0.1.1 - 0.1.8, 0.1.14 - 0.1.25"
20+
CPUSummary = "0.1.26"
2121
IfElse = "0.1"
2222
LoopVectorization = "0.12.86"
2323
ManualMemory = "0.1.1"
2424
PolyesterWeave = "0.1.1"
2525
Requires = "1"
26-
Static = "0.7.5"
26+
Static = "0.7.5, 0.8"
2727
ThreadingUtilities = "0.5"
2828
VectorizationBase = "0.21.15"
2929
julia = "1.6"

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ matrix multiplication on the CPU, built on top of
3434
Please see the
3535
[Octavian documentation](https://octavian.JuliaLinearAlgebra.org/stable).
3636

37+
Octavian dropped 32bit Julia support. See [PR#157](https://github.com/JuliaLinearAlgebra/Octavian.jl/pull/157). If you're interested in restoring it, please file a PR to fix failing tests.
38+
3739
## Benchmarks
3840

3941
You can run benchmarks using [BLASBenchmarksCPU.jl](https://github.com/JuliaLinearAlgebra/BLASBenchmarksCPU.jl):

benchmark/tilesearch.jl

+6-6
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ end
6666

6767

6868
T = Float64
69-
min_size = round(Int, sqrt(0.65 * Octavian.VectorizationBase.cache_size(Val(3)) / sizeof(T)))
70-
max_size = round(Int, sqrt( 32 * Octavian.VectorizationBase.cache_size(Val(3)) / sizeof(T)))
69+
min_size = round(Int, sqrt((0.65/4) * Octavian.num_cores() * Octavian.VectorizationBase.cache_size(Val(3)) / sizeof(T)))
70+
max_size = round(Int, sqrt( (32/4) * Octavian.num_cores() * Octavian.VectorizationBase.cache_size(Val(3)) / sizeof(T)))
7171

7272
SR = size_range(max_size, min_size, 400);
7373
const CsConst, AsConst, BsConst = matrix_range(SR, T);
@@ -111,7 +111,7 @@ const CsConst, AsConst, BsConst = matrix_range(SR, T);
111111

112112

113113
function matmul_objective(params)
114-
print("Params: ", params, "; ")
114+
print("Params= ", params, "; ")
115115
W₁, W₂, R₁, R₂ = params
116116
gflop = bench_size(CsConst, AsConst, BsConst, Val{W₁}(), Val{W₂}(), Val{R₁}(), Val{R₂}())
117117
println(gflop)
@@ -125,12 +125,12 @@ lower = 0.75 .* init;
125125
upper = [0.9, 1.25init[2], 0.999, 0.999];
126126
# init = [0.001, 0.9754033943603924, 0.5711159869399494, 0.7547361860432168];
127127

128-
#=
128+
129129
opt = Optim.optimize(
130130
matmul_objective, init, ParticleSwarm(lower = lower, upper = upper),
131-
Optim.Options(iterations = 10^6, time_limit = 8hours)
131+
Optim.Options(iterations = 10^6, time_limit = 14*hours)
132132
);
133-
=#
133+
134134

135135

136136

src/Octavian.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ using VectorizationBase, ArrayInterface, LoopVectorization
66

77
using VectorizationBase: align, AbstractStridedPointer, zstridedpointer, vsub_nsw, assume,
88
static_sizeof, StridedPointer, gesp, pause, pick_vector_width, has_feature
9-
using CPUSummary: cache_size, num_cores, cache_inclusive, cache_linesize
9+
using CPUSummary: cache_size, num_cores, num_threads, cache_inclusive, cache_linesize
1010
using LoopVectorization: preserve_buffer, CloseOpen, UpperBoundedInteger
1111
using ArrayInterface: size, strides, offsets, indices, axes, StrideIndex
1212
using IfElse: ifelse

src/global_constants.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ _second_cache_size(scs::StaticInt, ::False) = scs
6464
_second_cache_size(::StaticInt{0}, ::Nothing) = StaticInt(3145728)
6565
function second_cache_size()
6666
sc = second_cache()
67-
_second_cache_size(cache_size(sc), cache_inclusive(sc))
67+
_second_cache_size(cache_size(sc), cache_inclusive(sc)) * min(num_cores(), num_threads())
6868
end
6969

7070
first_cache_size(::Val{T}) where {T} = first_cache_size() ÷ static_sizeof(T)

0 commit comments

Comments
 (0)