Skip to content

Commit 1b5ad68

Browse files
committed
Merge pull request #216 from basho/bugfix/file-delete-blocking
Prevent one frozen dir from blocking other deletes Reviewed-by: mrallen1
2 parents e2de420 + bdfdeac commit 1b5ad68

File tree

2 files changed

+93
-18
lines changed

2 files changed

+93
-18
lines changed

src/bitcask.erl

+67
Original file line numberDiff line numberDiff line change
@@ -2835,6 +2835,73 @@ delete_keydir_test2() ->
28352835
bitcask:close(KDB),
28362836
ok.
28372837

2838+
retry_until_true(_, _, N) when N =< 0 ->
2839+
false;
2840+
retry_until_true(F, Delay, Retries) ->
2841+
case F() of
2842+
true ->
2843+
true;
2844+
_ ->
2845+
timer:sleep(Delay),
2846+
retry_until_true(F, Delay, Retries - 1)
2847+
end.
2848+
2849+
no_pending_delete_bottleneck_test_() ->
2850+
{timeout, 30, fun no_pending_delete_bottleneck_test2/0}.
2851+
2852+
no_pending_delete_bottleneck_test2() ->
2853+
% Populate B1 and B2. Then put till frozen both after reopening.
2854+
% Delete all keys in both. Merge in both. Unfreeze B2. With the bug
2855+
% B2 files will not be deleted.
2856+
2857+
Dir1 = "/tmp/bc.test.del.block.1",
2858+
Dir2 = "/tmp/bc.test.del.block.2",
2859+
2860+
Data = default_dataset(),
2861+
bitcask:close(init_dataset(Dir1, [{max_file_size, 1}], Data)),
2862+
bitcask:close(init_dataset(Dir2, [{max_file_size, 1}], Data)),
2863+
2864+
B1 = bitcask:open(Dir1, [read_write]),
2865+
B2 = bitcask:open(Dir2, [read_write]),
2866+
2867+
Files2 = readable_files(Dir2),
2868+
2869+
FileDeleted = fun(Fname) ->
2870+
not filelib:is_regular(Fname)
2871+
end,
2872+
2873+
FilesDeleted = fun() ->
2874+
lists:all(FileDeleted, Files2)
2875+
end,
2876+
2877+
try
2878+
bitcask:iterator(B1, -1, -1),
2879+
put_till_frozen(B1),
2880+
[begin
2881+
?assertEqual(ok, bitcask:delete(B1, K))
2882+
end || {K, _} <- Data],
2883+
bitcask:merge(Dir1),
2884+
2885+
bitcask:iterator(B2, -1, -1),
2886+
put_till_frozen(B2),
2887+
[begin
2888+
?assertEqual(ok, bitcask:delete(B2, K))
2889+
end || {K, _} <- Data],
2890+
bitcask:merge(Dir2),
2891+
bitcask:iterator_release(B2),
2892+
2893+
%% Timing is everything. This test will timeout in 60 seconds.
2894+
%% The merge delete worker works on a 1 second tick. So it should
2895+
%% have at least a chance to delete the files in 10 seconds.
2896+
%% Famous last words.
2897+
?assert(retry_until_true(FilesDeleted, 1000, 10))
2898+
after
2899+
catch bitcask:close(B1),
2900+
catch bitcask:close(B2)
2901+
end.
2902+
2903+
2904+
28382905
frag_status_test_() ->
28392906
{timeout, 60, fun frag_status_test2/0}.
28402907

src/bitcask_merge_delete.erl

+26-18
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ handle_call({defer_delete, Dirname, IterGeneration, Files}, _From, State) ->
8282
handle_call({queue_length}, _From, State) ->
8383
{reply, queue:len(State#state.q), State, ?TIMEOUT};
8484
handle_call({testonly__delete_trigger}, _From, State) ->
85-
{reply, ok, check_status(State), ?TIMEOUT};
85+
{reply, ok, delete_ready_files(State), ?TIMEOUT};
8686
handle_call(_Request, _From, State) ->
8787
Reply = unknown_request,
8888
{reply, Reply, State, ?TIMEOUT}.
@@ -91,7 +91,7 @@ handle_cast(_Msg, State) ->
9191
{noreply, State, ?TIMEOUT}.
9292

9393
handle_info(timeout, State) ->
94-
{noreply, check_status(State), ?TIMEOUT};
94+
{noreply, delete_ready_files(State), ?TIMEOUT};
9595
handle_info(_Info, State) ->
9696
{noreply, State, ?TIMEOUT}.
9797

@@ -105,29 +105,37 @@ code_change(_OldVsn, State, _Extra) ->
105105
%%% Internal functions
106106
%%%===================================================================
107107

108-
check_status(S) ->
108+
delete_ready_files(S) ->
109+
delete_ready_files(S, queue:new()).
110+
111+
delete_ready_files(S, PendingQ) ->
109112
case queue:out(S#state.q) of
110113
{empty, _} ->
111-
S;
112-
{{value, {Dirname, IterGeneration, Files}}, NewQ} ->
114+
S#state{q = PendingQ};
115+
{{value, {Dirname, IterGeneration, Files} = Entry}, NewQ} ->
113116
{_, KeyDir} = bitcask_nifs:keydir_new(Dirname),
114117
try
115118

116119
{_,_,_,IterStatus,_} = bitcask_nifs:keydir_info(KeyDir),
117-
CleanAndGo = fun() ->
118-
delete_files(Files),
119-
bitcask_nifs:keydir_release(KeyDir),
120-
check_status(S#state{q = NewQ})
121-
end,
122-
case IterStatus of
123-
{_, _, false, _} ->
124-
CleanAndGo();
125-
{CurGen, _, true, _} when CurGen > IterGeneration ->
126-
CleanAndGo();
127-
_ ->
128-
%% Do nothing, ignore NewQ
129-
S
120+
ReadyToDelete =
121+
case IterStatus of
122+
{_, _, false, _} ->
123+
true;
124+
{CurGen, _, true, _} when CurGen > IterGeneration ->
125+
true;
126+
_ ->
127+
false
128+
end,
129+
S2 = S#state{q = NewQ},
130+
case ReadyToDelete of
131+
true ->
132+
delete_files(Files),
133+
bitcask_nifs:keydir_release(KeyDir),
134+
delete_ready_files(S2, PendingQ);
135+
false ->
136+
delete_ready_files(S2, queue:in(Entry, PendingQ))
130137
end
138+
131139
catch _X:_Y ->
132140
%% Not sure what problem was: keydir is no longer
133141
%% valid, or a problem deleting files, but in any

0 commit comments

Comments
 (0)