@@ -52,7 +52,7 @@ ColumnInverter::ColumnInverter(PostingWriterProvider posting_writer_provider, Ve
52
52
53
53
void ColumnInverter::InitAnalyzer (const String &analyzer_name) {
54
54
auto [analyzer, status] = AnalyzerPool::instance ().GetAnalyzer (analyzer_name);
55
- if (!status.ok ()) {
55
+ if (!status.ok ()) {
56
56
Status status = Status::UnexpectedError (fmt::format (" Invalid analyzer: {}" , analyzer_name));
57
57
RecoverableError (status);
58
58
}
@@ -203,11 +203,13 @@ void ColumnInverter::Sort() {
203
203
16 );
204
204
}
205
205
206
- void ColumnInverter::GeneratePosting () {
206
+ MemUsageChange ColumnInverter::GeneratePosting () {
207
207
u32 last_term_num = std::numeric_limits<u32>::max ();
208
208
u32 last_doc_id = INVALID_DOCID;
209
209
StringRef last_term, term;
210
210
SharedPtr<PostingWriter> posting = nullptr ;
211
+ MemUsageChange ret{true , 0 };
212
+ Map<StringRef, PostingWriter *> modified_writers;
211
213
// printf("GeneratePosting() begin begin_doc_id_ %u, doc_count_ %u, merged_ %u", begin_doc_id_, doc_count_, merged_);
212
214
for (auto &i : positions_) {
213
215
if (last_term_num != i.term_num_ ) {
@@ -218,6 +220,9 @@ void ColumnInverter::GeneratePosting() {
218
220
}
219
221
term = GetTermFromNum (i.term_num_ );
220
222
posting = posting_writer_provider_ (String (term.data ()));
223
+ if (modified_writers.find (term) == modified_writers.end ()) {
224
+ modified_writers[term] = posting.get ();
225
+ }
221
226
// printf("\nswitched-term-%d-<%s>\n", i.term_num_, term.data());
222
227
if (last_term_num != (u32)(-1 )) {
223
228
assert (last_term_num < i.term_num_ );
@@ -242,6 +247,12 @@ void ColumnInverter::GeneratePosting() {
242
247
// printf(" EndDocument3-%u\n", last_doc_id);
243
248
}
244
249
// printf("GeneratePosting() end begin_doc_id_ %u, doc_count_ %u, merged_ %u", begin_doc_id_, doc_count_, merged_);
250
+ for (auto kv : modified_writers) {
251
+ PostingWriter *writer = kv.second ;
252
+ ret.Add (writer->GetSizeChange ());
253
+ }
254
+ LOG_TRACE (fmt::format (" MemUsageChange : {}, {}" , ret.is_add_ , ret.mem_ ));
255
+ return ret;
245
256
}
246
257
247
258
void ColumnInverter::SortForOfflineDump () {
@@ -258,7 +269,7 @@ void ColumnInverter::SortForOfflineDump() {
258
269
// ----------------------------------------------------------------------------------------------------------------------------+
259
270
// Data within each group
260
271
261
- void ColumnInverter::SpillSortResults (FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter>& buf_writer) {
272
+ void ColumnInverter::SpillSortResults (FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter> & buf_writer) {
262
273
// spill sort results for external merge sort
263
274
// if (positions_.empty()) {
264
275
// return;
@@ -267,19 +278,19 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
267
278
// size of this Run in bytes
268
279
u32 data_size = 0 ;
269
280
u64 data_size_pos = spill_file_tell;
270
- buf_writer->Write ((const char *)&data_size, sizeof (u32));
281
+ buf_writer->Write ((const char *)&data_size, sizeof (u32));
271
282
spill_file_tell += sizeof (u32);
272
283
273
284
// number of tuples
274
285
u32 num_of_tuples = positions_.size ();
275
286
tuple_count += num_of_tuples;
276
- buf_writer->Write ((const char *)&num_of_tuples, sizeof (u32));
287
+ buf_writer->Write ((const char *)&num_of_tuples, sizeof (u32));
277
288
spill_file_tell += sizeof (u32);
278
289
279
290
// start offset for next spill
280
291
u64 next_start_offset = 0 ;
281
292
u64 next_start_offset_pos = spill_file_tell;
282
- buf_writer->Write ((const char *)&next_start_offset, sizeof (u64));
293
+ buf_writer->Write ((const char *)&next_start_offset, sizeof (u64));
283
294
spill_file_tell += sizeof (u64);
284
295
285
296
u64 data_start_offset = spill_file_tell;
@@ -295,11 +306,11 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
295
306
}
296
307
record_length = term.size () + sizeof (docid_t ) + sizeof (u32) + 1 ;
297
308
298
- buf_writer->Write ((const char *)&record_length, sizeof (u32));
309
+ buf_writer->Write ((const char *)&record_length, sizeof (u32));
299
310
buf_writer->Write (term.data (), term.size ());
300
- buf_writer->Write ((const char *)&str_null, sizeof (char ));
301
- buf_writer->Write ((const char *)&(i.doc_id_ ), sizeof (docid_t ));
302
- buf_writer->Write ((const char *)&(i.term_pos_ ), sizeof (u32));
311
+ buf_writer->Write ((const char *)&str_null, sizeof (char ));
312
+ buf_writer->Write ((const char *)&(i.doc_id_ ), sizeof (docid_t ));
313
+ buf_writer->Write ((const char *)&(i.term_pos_ ), sizeof (u32));
303
314
}
304
315
buf_writer->Flush ();
305
316
// update data size
@@ -312,4 +323,4 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
312
323
fseek (spill_file, next_start_offset, SEEK_SET);
313
324
}
314
325
315
- } // namespace infinity
326
+ } // namespace infinity
0 commit comments