Skip to content

Optimize file uploads by reading from file in parallel #4331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package software.amazon.awssdk.core;

import java.nio.file.Path;
import java.util.Objects;
import software.amazon.awssdk.annotations.SdkPublicApi;
import software.amazon.awssdk.core.async.AsyncRequestBody;
import software.amazon.awssdk.utils.Validate;
import software.amazon.awssdk.utils.builder.CopyableBuilder;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;

/**
* Configuration options for {@link AsyncRequestBody#fromFile(FileRequestBodyConfiguration)} to configure how the SDK
* should read the file.
*
* @see #builder()
*/
@SdkPublicApi
public final class FileRequestBodyConfiguration implements ToCopyableBuilder<FileRequestBodyConfiguration.Builder,
FileRequestBodyConfiguration> {
private final Integer chunkSizeInBytes;
private final Long position;
private final Long numBytesToRead;
private final Path path;

private FileRequestBodyConfiguration(DefaultBuilder builder) {
this.path = Validate.notNull(builder.path, "path");
this.chunkSizeInBytes = Validate.isPositiveOrNull(builder.chunkSizeInBytes, "chunkSizeInBytes");
this.position = Validate.isNotNegativeOrNull(builder.position, "position");
this.numBytesToRead = Validate.isNotNegativeOrNull(builder.numBytesToRead, "numBytesToRead");
}

/**
* Create a {@link Builder}, used to create a {@link FileRequestBodyConfiguration}.
*/
public static Builder builder() {
return new DefaultBuilder();
}

/**
* @return the size of each chunk to read from the file
*/
public Integer chunkSizeInBytes() {
return chunkSizeInBytes;
}

/**
* @return the file position at which the request body begins.
*/
public Long position() {
return position;
}

/**
* @return the number of bytes to read from this file.
*/
public Long numBytesToRead() {
return numBytesToRead;
}

/**
* @return the file path
*/
public Path path() {
return path;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

FileRequestBodyConfiguration that = (FileRequestBodyConfiguration) o;

if (!Objects.equals(chunkSizeInBytes, that.chunkSizeInBytes)) {
return false;
}
if (!Objects.equals(position, that.position)) {
return false;
}
if (!Objects.equals(numBytesToRead, that.numBytesToRead)) {
return false;
}
return Objects.equals(path, that.path);
}

@Override
public int hashCode() {
int result = chunkSizeInBytes != null ? chunkSizeInBytes.hashCode() : 0;
result = 31 * result + (position != null ? position.hashCode() : 0);
result = 31 * result + (numBytesToRead != null ? numBytesToRead.hashCode() : 0);
result = 31 * result + (path != null ? path.hashCode() : 0);
return result;
}

@Override
public Builder toBuilder() {
return new DefaultBuilder(this);
}

public interface Builder extends CopyableBuilder<Builder, FileRequestBodyConfiguration> {

/**
* Sets the {@link Path} to the file containing data to send to the service
*
* @param path Path to file to read.
* @return This builder for method chaining.
*/
Builder path(Path path);

/**
* Sets the size of chunks read from the file. Increasing this will cause more data to be buffered into memory but
* may yield better latencies. Decreasing this will reduce memory usage but may cause reduced latency. Setting this value
* is very dependent on upload speed and requires some performance testing to tune.
*
* <p>The default chunk size is 16 KiB</p>
*
* @param chunkSize New chunk size in bytes.
* @return This builder for method chaining.
*/
Builder chunkSizeInBytes(Integer chunkSize);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kept Integer here because we only support reading up to Integer.MAX_VALUE number of bytes per chunk for now (ByteBuffer only takes int) and I don't think we should support long for this use case. LMK if you think otherwise.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 GB per chunk is probably a reasonable maximum for now, if it's not easy to support a higher number.


/**
* Sets the file position at which the request body begins.
*
* <p>By default, it's 0, i.e., reading from the beginning.
*
* @param position the position of the file
* @return The builder for method chaining.
*/
Builder position(Long position);

/**
* Sets the number of bytes to read from this file.
*
* <p>By default, it's same as the file length.
*
* @param numBytesToRead number of bytes to read
* @return The builder for method chaining.
*/
Builder numBytesToRead(Long numBytesToRead);
}

private static final class DefaultBuilder implements Builder {
private Long position;
private Path path;
private Integer chunkSizeInBytes;
private Long numBytesToRead;

private DefaultBuilder(FileRequestBodyConfiguration configuration) {
this.position = configuration.position;
this.path = configuration.path;
this.chunkSizeInBytes = configuration.chunkSizeInBytes;
this.numBytesToRead = configuration.numBytesToRead;
}

private DefaultBuilder() {

}

@Override
public Builder path(Path path) {
this.path = path;
return this;
}

@Override
public Builder chunkSizeInBytes(Integer chunkSizeInBytes) {
this.chunkSizeInBytes = chunkSizeInBytes;
return this;
}

@Override
public Builder position(Long position) {
this.position = position;
return this;
}

@Override
public Builder numBytesToRead(Long numBytesToRead) {
this.numBytesToRead = numBytesToRead;
return this;
}

@Override
public FileRequestBodyConfiguration build() {
return new FileRequestBodyConfiguration(this);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.reactivestreams.Publisher;
import org.reactivestreams.Subscriber;
import software.amazon.awssdk.annotations.SdkPublicApi;
import software.amazon.awssdk.core.FileRequestBodyConfiguration;
import software.amazon.awssdk.core.internal.async.ByteBuffersAsyncRequestBody;
import software.amazon.awssdk.core.internal.async.FileAsyncRequestBody;
import software.amazon.awssdk.core.internal.async.InputStreamWithExecutorAsyncRequestBody;
Expand Down Expand Up @@ -112,16 +113,46 @@ static AsyncRequestBody fromFile(Path path) {

/**
* Creates an {@link AsyncRequestBody} that produces data from the contents of a file. See
* {@link FileAsyncRequestBody#builder} to create a customized body implementation.
* {@link #fromFile(FileRequestBodyConfiguration)} to create a customized body implementation.
*
* @param file The file to read from.
* @return Implementation of {@link AsyncRequestBody} that reads data from the specified file.
* @see FileAsyncRequestBody
*/
static AsyncRequestBody fromFile(File file) {
return FileAsyncRequestBody.builder().path(file.toPath()).build();
}

/**
* Creates an {@link AsyncRequestBody} that produces data from the contents of a file.
*
* @param configuration configuration for how the SDK should read the file
* @return Implementation of {@link AsyncRequestBody} that reads data from the specified file.
*/
static AsyncRequestBody fromFile(FileRequestBodyConfiguration configuration) {
Validate.notNull(configuration, "configuration");
return FileAsyncRequestBody.builder()
.path(configuration.path())
.position(configuration.position())
.chunkSizeInBytes(configuration.chunkSizeInBytes())
.numBytesToRead(configuration.numBytesToRead())
.build();
}

/**
* Creates an {@link AsyncRequestBody} that produces data from the contents of a file.
*
* <p>
* This is a convenience method that creates an instance of the {@link FileRequestBodyConfiguration} builder,
* avoiding the need to create one manually via {@link FileRequestBodyConfiguration#builder()}.
*
* @param configuration configuration for how the SDK should read the file
* @return Implementation of {@link AsyncRequestBody} that reads data from the specified file.
*/
static AsyncRequestBody fromFile(Consumer<FileRequestBodyConfiguration.Builder> configuration) {
Validate.notNull(configuration, "configuration");
return fromFile(FileRequestBodyConfiguration.builder().applyMutation(configuration).build());
}

/**
* Creates an {@link AsyncRequestBody} that uses a single string as data.
*
Expand Down
Loading