Skip to content

Commit 233c587

Browse files
Fix race condition between mem acquire and cancel (#521)
1 parent 52c90d3 commit 233c587

File tree

7 files changed

+105
-7
lines changed

7 files changed

+105
-7
lines changed

include/aws/s3/private/s3_request.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ struct aws_s3_request {
139139
* retried.*/
140140
struct aws_byte_buf request_body;
141141

142+
/**
143+
* Ticket to acquire the buffer.
144+
*/
142145
struct aws_s3_buffer_ticket *ticket;
143146

144147
/* Beginning range of this part. */

include/aws/s3/s3_buffer_pool.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
* Note: in some cases pipeline can stall if new buffer cannot be allocated (ex. async writes flow).
2525
* In this case reserve request will indicate that not granting the ticket can block and buffer pool should try to
2626
* allocate ticket right away (or wait and call waker when mem is allocated for the case of async writes).
27+
* Note for custom pool implementations: Scheduler keeps track of all outstanding futures and will error them out when
28+
* request is paused or cancelled. Its still fine for memory pool implementation to deliver ticket (it will just be
29+
* released by future right away with no side effects) or just ignore the future if its already in error state.
2730
*/
2831

2932
AWS_PUSH_SANE_WARNING_LEVEL

source/s3_client.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1823,6 +1823,9 @@ static void s_s3_prepare_acquire_mem_callback_and_destroy(
18231823

18241824
/* BEGIN CRITICAL SECTION */
18251825
aws_s3_meta_request_lock_synced_data(meta_request);
1826+
aws_linked_list_remove(&payload->request->pending_buffer_future_list_node);
1827+
payload->request->synced_data.buffer_future =
1828+
aws_future_s3_buffer_ticket_release(payload->request->synced_data.buffer_future);
18261829
aws_s3_meta_request_set_fail_synced(meta_request, payload->request, error_code);
18271830
aws_s3_meta_request_unlock_synced_data(meta_request);
18281831
/* END CRITICAL SECTION */
@@ -1909,7 +1912,10 @@ void s_acquire_mem_and_prepare_request(
19091912
}
19101913
/* END CRITICAL SECTION */
19111914

1912-
aws_future_s3_buffer_ticket_register_callback(payload->buffer_future, s_on_pool_buffer_reserved, payload);
1915+
/* Note: run callback async on event loop. this is done to avoid any deadlocks between cancelling
1916+
which requires a lock on meta request and buffer acquire callback which also requires a lock. */
1917+
aws_future_s3_buffer_ticket_register_event_loop_callback(
1918+
payload->buffer_future, meta_request->io_event_loop, s_on_pool_buffer_reserved, payload);
19131919
return;
19141920
}
19151921

source/s3_default_buffer_pool.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,8 @@ void s_default_pool_trim(struct aws_s3_buffer_pool *pool) {
189189

190190
static struct aws_s3_buffer_pool_vtable s_default_pool_vtable = {
191191
.reserve = s_default_pool_reserve,
192-
.trim = s_default_pool_trim};
192+
.trim = s_default_pool_trim,
193+
};
193194

194195
struct aws_s3_buffer_pool *aws_s3_default_buffer_pool_new(
195196
struct aws_allocator *allocator,

source/s3_meta_request.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,13 +1832,12 @@ void aws_s3_meta_request_cancel_pending_buffer_futures_synced(
18321832
struct aws_s3_meta_request *meta_request,
18331833
int error_code) {
18341834
ASSERT_SYNCED_DATA_LOCK_HELD(meta_request);
1835-
while (!aws_linked_list_empty(&meta_request->synced_data.pending_buffer_futures)) {
18361835

1837-
struct aws_linked_list_node *request_node =
1838-
aws_linked_list_pop_front(&meta_request->synced_data.pending_buffer_futures);
1836+
for (struct aws_linked_list_node *node = aws_linked_list_begin(&meta_request->synced_data.pending_buffer_futures);
1837+
node != aws_linked_list_end(&meta_request->synced_data.pending_buffer_futures);
1838+
node = aws_linked_list_next(node)) {
18391839

1840-
struct aws_s3_request *request =
1841-
AWS_CONTAINER_OF(request_node, struct aws_s3_request, pending_buffer_future_list_node);
1840+
struct aws_s3_request *request = AWS_CONTAINER_OF(node, struct aws_s3_request, pending_buffer_future_list_node);
18421841

18431842
aws_future_s3_buffer_ticket_set_error(request->synced_data.buffer_future, error_code);
18441843
}

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ add_test_case(test_s3_buffer_pool_trim)
394394
add_test_case(test_s3_buffer_pool_reserve_over_limit)
395395
add_test_case(test_s3_buffer_pool_too_small)
396396
add_net_test_case(test_s3_put_object_buffer_pool_trim)
397+
add_net_test_case(test_s3_put_object_buffer_acquire_error)
397398
add_test_case(test_s3_buffer_pool_forced_buffer)
398399
add_test_case(test_s3_buffer_pool_forced_buffer_after_limit_hit)
399400
add_test_case(test_s3_buffer_pool_forced_buffer_wont_stop_reservations)

tests/s3_data_plane_tests.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3350,6 +3350,91 @@ static int s_test_s3_put_object_sse_c_aes256_multipart_with_checksum(struct aws_
33503350
return 0;
33513351
}
33523352

3353+
struct aws_future_s3_buffer_ticket *s_failing_pool_reserve(
3354+
struct aws_s3_buffer_pool *pool,
3355+
struct aws_s3_buffer_pool_reserve_meta meta) {
3356+
(void)meta;
3357+
3358+
struct aws_future_s3_buffer_ticket *future = aws_future_s3_buffer_ticket_new((struct aws_allocator *)pool->impl);
3359+
aws_future_s3_buffer_ticket_set_error(future, AWS_ERROR_S3_BUFFER_ALLOCATION_FAILED);
3360+
return future;
3361+
}
3362+
3363+
void s_failing_pool_trim(struct aws_s3_buffer_pool *pool) {
3364+
(void)pool;
3365+
}
3366+
3367+
static struct aws_s3_buffer_pool_vtable s_failing_pool_vtable = {
3368+
.reserve = s_failing_pool_reserve,
3369+
.trim = s_failing_pool_trim,
3370+
};
3371+
3372+
void s_failing_pool_destroy(struct aws_s3_buffer_pool *buffer_pool_wrapper) {
3373+
aws_mem_release((struct aws_allocator *)buffer_pool_wrapper->impl, buffer_pool_wrapper);
3374+
}
3375+
3376+
struct aws_s3_buffer_pool *s_always_error_buffer_pool_fn(
3377+
struct aws_allocator *allocator,
3378+
struct aws_s3_buffer_pool_config config) {
3379+
(void)config;
3380+
struct aws_s3_buffer_pool *pool = aws_mem_calloc(allocator, 1, sizeof(struct aws_s3_buffer_pool));
3381+
pool->impl = allocator;
3382+
pool->vtable = &s_failing_pool_vtable;
3383+
aws_ref_count_init(&pool->ref_count, pool, (aws_simple_completion_callback *)s_failing_pool_destroy);
3384+
3385+
return pool;
3386+
}
3387+
3388+
AWS_TEST_CASE(test_s3_put_object_buffer_acquire_error, s_test_s3_put_object_buffer_acquire_error)
3389+
static int s_test_s3_put_object_buffer_acquire_error(struct aws_allocator *allocator, void *ctx) {
3390+
(void)ctx;
3391+
3392+
struct aws_s3_tester tester;
3393+
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));
3394+
3395+
struct aws_s3_client_config client_config = {
3396+
.part_size = 8 * 1024 * 1024, .buffer_pool_factory_fn = s_always_error_buffer_pool_fn};
3397+
3398+
ASSERT_SUCCESS(aws_s3_tester_bind_client(
3399+
&tester, &client_config, AWS_S3_TESTER_BIND_CLIENT_REGION | AWS_S3_TESTER_BIND_CLIENT_SIGNING));
3400+
3401+
struct aws_s3_client *client = aws_s3_client_new(allocator, &client_config);
3402+
3403+
ASSERT_TRUE(client != NULL);
3404+
3405+
struct aws_byte_buf path_buf;
3406+
AWS_ZERO_STRUCT(path_buf);
3407+
3408+
ASSERT_SUCCESS(aws_s3_tester_upload_file_path_init(
3409+
tester.allocator, &path_buf, aws_byte_cursor_from_c_str("/prefix/round_trip/test_acquire_buffer_error.txt")));
3410+
3411+
struct aws_byte_cursor object_path = aws_byte_cursor_from_buf(&path_buf);
3412+
3413+
struct aws_s3_tester_meta_request_options put_options = {
3414+
.allocator = allocator,
3415+
.meta_request_type = AWS_S3_META_REQUEST_TYPE_PUT_OBJECT,
3416+
.client = client,
3417+
.validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_FAILURE,
3418+
.put_options =
3419+
{
3420+
.object_size_mb = 10,
3421+
.object_path_override = object_path,
3422+
},
3423+
};
3424+
3425+
struct aws_s3_meta_request_test_results meta_request_test_results;
3426+
aws_s3_meta_request_test_results_init(&meta_request_test_results, allocator);
3427+
ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &put_options, &meta_request_test_results));
3428+
ASSERT_INT_EQUALS(AWS_ERROR_S3_BUFFER_ALLOCATION_FAILED, meta_request_test_results.finished_error_code);
3429+
client = aws_s3_client_release(client);
3430+
3431+
aws_s3_meta_request_test_results_clean_up(&meta_request_test_results);
3432+
aws_byte_buf_clean_up(&path_buf);
3433+
aws_s3_tester_clean_up(&tester);
3434+
3435+
return 0;
3436+
}
3437+
33533438
static int s_test_s3_put_object_content_md5_helper(
33543439
struct aws_allocator *allocator,
33553440
bool multipart_upload,

0 commit comments

Comments
 (0)