Skip to content

Commit de4e5a9

Browse files
committed
Make BigMap a flat hash map
1 parent e9cb0a7 commit de4e5a9

File tree

2 files changed

+62
-26
lines changed

2 files changed

+62
-26
lines changed

simd/meson.build

+7-2
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@ project('ninja-simd', 'cpp', default_options: ['cpp_std=c++20'])
22

33
tbb = dependency('tbb')
44

5-
cflags = ['-fno-omit-frame-pointer']
5+
cflags = ['-fno-omit-frame-pointer', '-fno-strict-aliasing']
66
if target_machine.cpu_family() == 'aarch64'
77
cflags += ['-mno-outline-atomics']
88
endif
9+
if target_machine.cpu_family() == 'x86_64'
10+
cflags += ['-mcx16']
11+
endif
912

1013
executable('ninja-simd',
1114
'depfile_parser.cc',
1215
'ninja-simd.cpp',
1316
dependencies: [tbb],
14-
cpp_args: cflags)
17+
cpp_args: cflags,
18+
# TODO(pcc): get rid of this, it's silly that we have to call a function just to use cmpxchg16b
19+
link_args: ['-l:libatomic.a'])

simd/ninja-simd.cpp

+55-24
Original file line numberDiff line numberDiff line change
@@ -199,16 +199,16 @@ struct Edge {
199199
};
200200

201201
struct Node {
202+
std::string_view path __attribute__((aligned(16)));
202203
Node* next = nullptr;
203-
std::string_view path;
204204
std::string path_buf;
205205
std::vector<Edge*> out_edges;
206206
Edge* in_edge = nullptr;
207207
bool allow_missing = false;
208208
bool nonexistent = false;
209209
std::atomic<bool> statted = false;
210-
bool has_build_log_hash;
211-
uint32_t build_log_index = -1u;
210+
bool has_build_log_hash = false;
211+
uint32_t build_log_index = 0;
212212
struct timespec mtime;
213213
HashResult build_log_hash;
214214
std::vector<Node*> depfile_inputs;
@@ -219,22 +219,30 @@ struct Node {
219219
// only supports the operations that we need. In particular, because we do
220220
// not support resizing, the data structure can be made lock-free.
221221
//
222-
// Each bucket is an atomic pointer. The assumption is that the number of nodes
223-
// will be large, so the data structure consists of a fixed size array of 1M
224-
// bucket. (In the future we may consider dynamically sizing the array based on
225-
// a node count recorded in the build log.) Insertion operations add the new
226-
// node onto the head of the linked list stored in the bucket. Our first
227-
// compare-exchange assumes the bucket to be empty (given the array size, this
228-
// is likely to be true) and if that operation fails, we search the linked list
229-
// for an existing node and compare-exchange the old head with a new one if it
230-
// fails.
222+
// Each bucket is a char array of size sizeof(Node) that gets casted into
223+
// an object of type Node as necessary, so it's basically a flat hash map. This
224+
// avoids a pointer load in the common case where the slot is unoccupied. The
225+
// assumption is that the number of nodes will be large, so the data structure
226+
// consists of a fixed size array of 1M buckets. (In the future we may consider
227+
// dynamically sizing the array based on a node count recorded in the build
228+
// log.) Insertion operations either atomically compare-exchange the path field
229+
// with the new path, or if that fails add the new node onto the head of the
230+
// linked list stored in the bucket by compare-exchanging its next field. Our
231+
// first compare-exchange assumes the bucket to be empty (given the array size,
232+
// this is likely to be true) and if that operation fails, we search the linked
233+
// list for an existing node and compare-exchange the old head with a new one if
234+
// it fails.
235+
//
236+
// For now, we make the following assumptions:
237+
// - Everything in Node is zero initializable, except for path_buf.
238+
// - sizeof(std::string_view) == 16.
231239
struct BigMap {
232240
static constexpr size_t array_size = 1 << 20;
233-
std::atomic<Node*> nodes[array_size] = {};
241+
char nodes[array_size][sizeof(Node)] = {};
234242

235-
Node* operator[](std::string_view path) const {
243+
Node* operator[](std::string_view path) {
236244
HashResult hash = hash_buf(path.begin(), path.size());
237-
Node* node = nodes[hash.lo & (BigMap::array_size - 1)];
245+
Node* node = reinterpret_cast<Node *>(nodes[hash.lo & (BigMap::array_size - 1)]);
238246
while (node) {
239247
if (node->path == path)
240248
return node;
@@ -245,13 +253,34 @@ struct BigMap {
245253

246254
// Finds an existing node with path == tmp_node->path and returns it,
247255
// otherwise inserts tmp_node into the map and returns it. If the insert
248-
// operation succeeds, tmp_node will be replaced with a newly allocated node.
256+
// operation results in the BigMap taking ownership of tmp_node, tmp_node
257+
// will be replaced with a newly allocated node.
249258
Node* get_or_insert(Node*& tmp_node) {
250259
HashResult hash = hash_buf(tmp_node->path.begin(), tmp_node->path.size());
251-
std::atomic<Node*>& slot = nodes[hash.lo & (BigMap::array_size - 1)];
260+
Node* slot =
261+
reinterpret_cast<Node*>(nodes[hash.lo & (BigMap::array_size - 1)]);
262+
static_assert(sizeof(std::string_view) == 16);
263+
auto* path_atomic = reinterpret_cast<std::atomic<__int128>*>(&slot->path);
264+
__int128 new_path_atomic = *reinterpret_cast<__int128*>(&tmp_node->path);
265+
__int128 existing_path = 0;
266+
if (path_atomic->compare_exchange_strong(existing_path, new_path_atomic,
267+
std::memory_order_acq_rel)) {
268+
new (&slot->path_buf) std::string;
269+
slot->path_buf = std::move(tmp_node->path_buf);
270+
if (!slot->path_buf.empty()) {
271+
slot->path = slot->path_buf;
272+
}
273+
return slot;
274+
}
275+
if (*reinterpret_cast<std::string_view*>(&existing_path) ==
276+
tmp_node->path) {
277+
return slot;
278+
}
252279
Node* value = nullptr;
280+
std::atomic<Node*>& next =
281+
*reinterpret_cast<std::atomic<Node*>*>(&slot->next);
253282
while (1) {
254-
if (slot.compare_exchange_strong(value, tmp_node,
283+
if (next.compare_exchange_strong(value, tmp_node,
255284
std::memory_order_acq_rel)) {
256285
Node* inserted_node = tmp_node;
257286
tmp_node = new Node;
@@ -272,8 +301,9 @@ struct BigMap {
272301

273302
size_t size() const {
274303
size_t size = 0;
275-
for (Node* node : nodes) {
276-
while (node) {
304+
for (const char* node_ptr : nodes) {
305+
auto *node = reinterpret_cast<const Node *>(node_ptr);
306+
while (node && !node->path.empty()) {
277307
++size;
278308
node = node->next;
279309
}
@@ -1372,7 +1402,7 @@ void read_build_log(Global& global, BuildState& state) {
13721402
} else {
13731403
tmp_node->path = std::string_view(pos, node_len);
13741404
Node* n = global.nodes.get_or_insert(tmp_node);
1375-
n->build_log_index = nodes.size();
1405+
n->build_log_index = nodes.size() + 1;
13761406
nodes.push_back(n);
13771407
pos += node_len + 1;
13781408
}
@@ -1441,13 +1471,14 @@ void read_depfile(Global& global, std::string depfile, Node* out) {
14411471

14421472
void write_build_log(BuildState& state, Edge* e) {
14431473
auto introduce_node = [&](Node* n) {
1444-
if (n->build_log_index != -1u)
1445-
return n->build_log_index;
1474+
if (n->build_log_index != 0)
1475+
return n->build_log_index - 1;
14461476
if (n->path.empty())
14471477
error("attempt to introduce an empty path");
14481478
write(state.log_fd, n->path.data(), n->path.size());
14491479
write(state.log_fd, "", 1);
1450-
return n->build_log_index = state.build_log_next_index++;
1480+
n->build_log_index = ++state.build_log_next_index;
1481+
return n->build_log_index - 1;
14511482
};
14521483
std::vector<uint32_t> depfile_idxs;
14531484
for (Node* n : e->outputs[0]->depfile_inputs)

0 commit comments

Comments
 (0)