@@ -29,16 +29,7 @@ limitations under the License.
29
29
#include " client/ds/blob.h"
30
30
#include " client/ds/i_object.h"
31
31
#include " common/util/arrow.h" // IWYU pragma: keep
32
-
33
- #ifdef __GNUC__
34
- #pragma GCC diagnostic push
35
- #pragma GCC diagnostic ignored "-Wunused-variable"
36
- #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
37
- #endif
38
- #include " BBHash/BooPHF.h"
39
- #ifdef __GNUC__
40
- #pragma GCC diagnostic pop
41
- #endif
32
+ #include " grape/vertex_map/idxers/pthash_idxer.h"
42
33
43
34
namespace vineyard {
44
35
@@ -229,8 +220,6 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
229
220
public:
230
221
static_assert (std::is_pod<V>::value, " V in perfect hashmap must be POD type" );
231
222
232
- typedef boomphf::SingleHashFunctor<K> hasher_t ;
233
-
234
223
explicit PerfectHashmapBuilder (Client& client)
235
224
: PerfectHashmapBaseBuilder<K, V>(client) {}
236
225
@@ -248,12 +237,21 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
248
237
const V* values, const size_t n_elements) {
249
238
this ->set_num_elements_ (n_elements);
250
239
this ->set_ph_keys_ (keys);
251
- RETURN_ON_ERROR (detail::boomphf::build_keys (
252
- bphf_, reinterpret_cast <const K*>(keys->data ()), n_elements));
240
+ for (size_t i = 0 ; i < n_elements; ++i) {
241
+ this ->builder_ .add ((reinterpret_cast <const K*>(keys->data ()))[i]);
242
+ }
243
+
244
+ this ->builder_ .buildPhf ();
245
+ std::unique_ptr<BlobWriter> writer;
246
+ size_t serialize_size = this ->builder_ .getSerializeSize ();
247
+ RETURN_ON_ERROR (client.CreateBlob (serialize_size, writer));
248
+ this ->builder_ .finishInplace (writer->data (), serialize_size, this ->idxer_ );
249
+ writer->Seal (client, buf);
250
+
253
251
return this ->allocateValues (
254
252
client, n_elements, [&](V* shuffled_values) -> Status {
255
- return detail::boomphf ::build_values (
256
- bphf_ , reinterpret_cast <const K*>(keys->data ()), n_elements,
253
+ return detail::perfect_hash ::build_values (
254
+ idxer_ , reinterpret_cast <const K*>(keys->data ()), n_elements,
257
255
values, shuffled_values);
258
256
});
259
257
}
@@ -266,11 +264,27 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
266
264
const V* values, const size_t n_elements) {
267
265
this ->set_num_elements_ (n_elements);
268
266
this ->set_ph_keys_ (keys);
269
- RETURN_ON_ERROR (detail::boomphf::build_keys (bphf_, keys->GetArray ()));
267
+ for (auto iter =
268
+ detail::perfect_hash::arrow_array_iterator<K, ArrowArrayType<K>>(
269
+ keys->GetArray ()->begin ());
270
+ iter !=
271
+ detail::perfect_hash::arrow_array_iterator<K, ArrowArrayType<K>>(
272
+ keys->GetArray ()->end ());
273
+ iter++) {
274
+ this ->builder_ .add (*iter);
275
+ }
276
+
277
+ this ->builder_ .buildPhf ();
278
+ std::unique_ptr<BlobWriter> writer;
279
+ size_t serialize_size = this ->builder_ .getSerializeSize ();
280
+ RETURN_ON_ERROR (client.CreateBlob (serialize_size, writer));
281
+ this ->builder_ .finishInplace (writer->data (), serialize_size, this ->idxer_ );
282
+ writer->Seal (client, buf);
283
+
270
284
return this ->allocateValues (
271
285
client, n_elements, [&](V* shuffled_values) -> Status {
272
- return detail::boomphf ::build_values (bphf_ , keys->GetArray (), values ,
273
- shuffled_values);
286
+ return detail::perfect_hash ::build_values (idxer_ , keys->GetArray (),
287
+ values, shuffled_values);
274
288
});
275
289
return Status::OK ();
276
290
}
@@ -289,12 +303,21 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
289
303
const V begin_value, const size_t n_elements) {
290
304
this ->set_num_elements_ (n_elements);
291
305
this ->set_ph_keys_ (keys);
292
- RETURN_ON_ERROR (detail::boomphf::build_keys (
293
- bphf_, reinterpret_cast <const K*>(keys->data ()), n_elements));
306
+ for (size_t i = 0 ; i < n_elements; ++i) {
307
+ this ->builder_ .add ((reinterpret_cast <const K*>(keys->data ()))[i]);
308
+ }
309
+
310
+ this ->builder_ .buildPhf ();
311
+ std::unique_ptr<BlobWriter> writer;
312
+ size_t serialize_size = this ->builder_ .getSerializeSize ();
313
+ RETURN_ON_ERROR (client.CreateBlob (serialize_size, writer));
314
+ this ->builder_ .finishInplace (writer->data (), serialize_size, this ->idxer_ );
315
+ writer->Seal (client, buf);
316
+
294
317
return this ->allocateValues (
295
318
client, n_elements, [&](V* shuffled_values) -> Status {
296
- return detail::boomphf ::build_values (
297
- bphf_ , reinterpret_cast <const K*>(keys->data ()), n_elements,
319
+ return detail::perfect_hash ::build_values (
320
+ idxer_ , reinterpret_cast <const K*>(keys->data ()), n_elements,
298
321
begin_value, shuffled_values);
299
322
});
300
323
}
@@ -307,11 +330,27 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
307
330
const V begin_value, const size_t n_elements) {
308
331
this ->set_num_elements_ (n_elements);
309
332
this ->set_ph_keys_ (keys);
310
- RETURN_ON_ERROR (detail::boomphf::build_keys (bphf_, keys->GetArray ()));
333
+ for (auto iter =
334
+ detail::perfect_hash::arrow_array_iterator<K, ArrowArrayType<K>>(
335
+ keys->GetArray ()->begin ());
336
+ iter !=
337
+ detail::perfect_hash::arrow_array_iterator<K, ArrowArrayType<K>>(
338
+ keys->GetArray ()->end ());
339
+ iter++) {
340
+ this ->builder_ .add (*iter);
341
+ }
342
+
343
+ this ->builder_ .buildPhf ();
344
+ std::unique_ptr<BlobWriter> writer;
345
+ size_t serialize_size = this ->builder_ .getSerializeSize ();
346
+ RETURN_ON_ERROR (client.CreateBlob (serialize_size, writer));
347
+ this ->builder_ .finishInplace (writer->data (), serialize_size, this ->idxer_ );
348
+ writer->Seal (client, buf);
349
+
311
350
return this ->allocateValues (
312
351
client, n_elements, [&](V* shuffled_values) -> Status {
313
- return detail::boomphf ::build_values (bphf_, keys-> GetArray (),
314
- begin_value, shuffled_values);
352
+ return detail::perfect_hash ::build_values (
353
+ idxer_, keys-> GetArray (), begin_value, shuffled_values);
315
354
});
316
355
return Status::OK ();
317
356
}
@@ -323,15 +362,7 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
323
362
*
324
363
*/
325
364
Status Build (Client& client) override {
326
- size_t size = detail::boomphf::bphf_serde::compute_size (bphf_);
327
- std::unique_ptr<BlobWriter> blob_writer;
328
- RETURN_ON_ERROR (client.CreateBlob (size, blob_writer));
329
- char * dst = detail::boomphf::bphf_serde::ser (blob_writer->data (), bphf_);
330
- RETURN_ON_ASSERT (dst == blob_writer->data () + size,
331
- " boomphf serialization error: buffer size mismatched" );
332
- std::shared_ptr<Object> blob;
333
- RETURN_ON_ERROR (blob_writer->Seal (client, blob));
334
- this ->set_ph_ (std::dynamic_pointer_cast<Blob>(blob));
365
+ this ->set_ph_ (buf);
335
366
return Status::OK ();
336
367
}
337
368
@@ -359,10 +390,11 @@ class PerfectHashmapBuilder : public PerfectHashmapBaseBuilder<K, V> {
359
390
return Status::OK ();
360
391
}
361
392
362
- boomphf::mphf<K, hasher_t > bphf_;
393
+ grape::PTHashIdxerBuilder<K, uint64_t > builder_;
394
+ grape::PTHashIdxer<K, uint64_t > idxer_;
395
+ std::shared_ptr<Object> buf;
363
396
364
397
const int concurrency_ = std::thread::hardware_concurrency();
365
- const double gamma_ = 2 .5f ;
366
398
};
367
399
368
400
} // namespace vineyard
0 commit comments