Skip to content

Commit 59f01ab

Browse files
committed
I didn't think it was possible to get even faster but here we are
Special thanks to jesperen (for the process dictionary interface) and garazdawi for explaining his bitstring splittting which avoids binary.split Still not as fast on my intel chip, but helluva lot faster than my old code
1 parent 2931163 commit 59f01ab

File tree

2 files changed

+50
-27
lines changed

2 files changed

+50
-27
lines changed

common.exs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ defmodule ReadMeasurements do
1313
end
1414

1515
def worker_count do
16-
:erlang.system_info(:logical_processors) * 4
16+
# Once upon a time, I cranked this up 4x whatever the logical processors were
17+
# the bad news: this is a waste of resources as the processes will now fight over the CPUs
18+
# Instead it should probably be 1:1
19+
:erlang.system_info(:logical_processors)
1720
end
1821

1922
def chunk_size do

src/1brc.workers.blob.maps.chunk_to_worker.exs

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ defmodule ReadMeasurements.App do
2020

2121
workers = Enum.map(1..worker_count, fn _ ->
2222
spawn_link(fn ->
23-
worker_main(parent, "", %{})
23+
worker_main(parent, "")
2424
end)
2525
end)
2626

27-
{:ok, file} = :prim_file.open(filename, [:binary, :read])
27+
{:ok, file} = :prim_file.open(filename, [:raw, :binary, :read])
2828
result =
2929
try do
3030
read_file(file, workers)
@@ -40,32 +40,24 @@ defmodule ReadMeasurements.App do
4040
|> ReadMeasurements.output()
4141
end
4242

43-
def worker_main(parent, <<>>, result) do
43+
def worker_main(parent, <<>>) do
4444
send(parent, {:checkin, self()})
4545
receive do
4646
:eos ->
47-
send(parent, {:result, result})
47+
# we don't have to care about the type unlike the erlang version
48+
# so we can just take the process dictionary as is
49+
send(parent, {:result, :erlang.get()})
4850
:ok
4951

5052
{:chunk, bin} ->
51-
worker_main(parent, bin, result)
53+
worker_main(parent, bin)
5254
end
5355
end
5456

55-
def worker_main(parent, rest, result) do
56-
[ws, rest] = :binary.split(rest, ";")
57-
{temp, <<"\n",rest::binary>>} = binary_split_to_fixed_point(rest)
58-
57+
def worker_main(parent, rest) do
5958
worker_main(
6059
parent,
61-
rest,
62-
case Map.fetch(result, ws) do
63-
:error ->
64-
Map.put(result, ws, {1, temp, temp, temp})
65-
66-
{:ok, {count, total, mn, mx}} ->
67-
Map.put(result, ws, {count + 1, total + temp, min(mn, temp), max(mx, temp)})
68-
end
60+
process_line(rest)
6961
)
7062
end
7163

@@ -124,26 +116,54 @@ defmodule ReadMeasurements.App do
124116
end
125117
end
126118

119+
defp process_line(rest) do
120+
parse_weather_station(rest, rest, 0)
121+
end
122+
123+
defp parse_weather_station(bin, <<";",_rest::binary>>, count) do
124+
<<ws::binary-size(count), ";", rest::binary>> = bin
125+
parse_temp(rest, ws)
126+
end
127+
128+
defp parse_weather_station(bin, <<_c,rest::binary>>, count) do
129+
parse_weather_station(bin, rest, count + 1)
130+
end
131+
127132
defmacrop char_to_num(c) do
128133
quote do
129-
unquote(c) - ?0
134+
(unquote(c) - ?0)
130135
end
131136
end
132137

133-
defp binary_split_to_fixed_point(<<?-, d2, d1, ?., d01, rest::binary>>) do
134-
{-(char_to_num(d2) * 100 + char_to_num(d1) * 10 + char_to_num(d01)), rest}
138+
defp parse_temp(<<?-, d2, d1, ?., d01, "\n", rest::binary>>, ws) do
139+
commit_entry(ws, -(char_to_num(d2) * 100 + char_to_num(d1) * 10 + char_to_num(d01)))
140+
rest
141+
end
142+
143+
defp parse_temp(<<?-, d1, ?., d01, "\n", rest::binary>>, ws) do
144+
commit_entry(ws, -(char_to_num(d1) * 10 + char_to_num(d01)))
145+
rest
135146
end
136147

137-
defp binary_split_to_fixed_point(<<?-, d1, ?., d01, rest::binary>>) do
138-
{-(char_to_num(d1) * 10 + char_to_num(d01)), rest}
148+
defp parse_temp(<<d2, d1, ?., d01, "\n", rest::binary>>, ws) do
149+
commit_entry(ws, char_to_num(d2) * 100 + char_to_num(d1) * 10 + char_to_num(d01))
150+
rest
139151
end
140152

141-
defp binary_split_to_fixed_point(<<d2, d1, ?., d01, rest::binary>>) do
142-
{char_to_num(d2) * 100 + char_to_num(d1) * 10 + char_to_num(d01), rest}
153+
defp parse_temp(<<d1, ?., d01, "\n", rest::binary>>, ws) do
154+
commit_entry(ws, char_to_num(d1) * 10 + char_to_num(d01))
155+
rest
143156
end
144157

145-
defp binary_split_to_fixed_point(<<d1, ?., d01, rest::binary>>) do
146-
{char_to_num(d1) * 10 + char_to_num(d01), rest}
158+
defp commit_entry(ws, temp) do
159+
# write it to the process dictionary
160+
case :erlang.get(ws) do
161+
:undefined ->
162+
:erlang.put(ws, {1, temp, temp, temp})
163+
164+
{count, total, mn, mx} ->
165+
:erlang.put(ws, {count + 1, total + temp, min(mn, temp), max(mx, temp)})
166+
end
147167
end
148168
end
149169

0 commit comments

Comments
 (0)