Skip to content

Commit b1a4615

Browse files
authored
fix dead lock when dump samples with filter (#2052)
1 parent 1e8173c commit b1a4615

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

onmt/inputters/corpus.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -269,18 +269,20 @@ def write_files_from_queues(sample_path, queues):
269269
"""
270270
os.makedirs(sample_path, exist_ok=True)
271271
for c_name in queues.keys():
272-
dest_base = dest_base = os.path.join(
272+
dest_base = os.path.join(
273273
sample_path, "{}.{}".format(c_name, CorpusName.SAMPLE))
274274
with open(dest_base + ".src", 'w', encoding="utf-8") as f_src,\
275275
open(dest_base + ".tgt", 'w', encoding="utf-8") as f_tgt:
276276
while True:
277277
_next = False
278-
for i, q in enumerate(queues[c_name]):
278+
for q in queues[c_name]:
279279
item = q.get()
280+
if item == "blank":
281+
continue
280282
if item == "break":
281283
_next = True
282284
break
283-
j, src_line, tgt_line = item
285+
_, src_line, tgt_line = item
284286
f_src.write(src_line + '\n')
285287
f_tgt.write(tgt_line + '\n')
286288
if _next:
@@ -299,6 +301,8 @@ def build_sub_vocab(corpora, transforms, opts, n_sample, stride, offset):
299301
for i, item in enumerate(c_iter):
300302
maybe_example = DatasetAdapter._process(item, is_train=True)
301303
if maybe_example is None:
304+
if opts.dump_samples:
305+
build_sub_vocab.queues[c_name][offset].put("blank")
302306
continue
303307
src_line, tgt_line = maybe_example['src'], maybe_example['tgt']
304308
sub_counter_src.update(src_line.split(' '))

0 commit comments

Comments
 (0)