Skip to content

Commit 2bd2db1

Browse files
authored
chore(storage): add benchmarking script (#5856)
1 parent ee5751f commit 2bd2db1

File tree

9 files changed

+2066
-0
lines changed

9 files changed

+2066
-0
lines changed

storage/internal/benchmarks/README.md

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# go-bench-gcs
2+
**This is not an officially supported Google product**
3+
4+
## Run example:
5+
This runs 1000 iterations on 512kib to 2Gib files in the background, sending output to `out.log`:
6+
7+
`go run main -p {PROJECT_ID} -t 72h -max_samples 1000 -o {RESULTS_FILE_NAME}.csv &> out.log &`
8+
9+
10+
## CLI parameters
11+
12+
| Parameter | Description | Possible values | Default |
13+
| --------- | ----------- | --------------- |:-------:|
14+
| -p | projectID | a project ID | * |
15+
| -creds | path to credentials file | any path | from environment |
16+
| -o | file to output results to <br> if empty, will output to stdout | any file path | stdout |
17+
| -output_type | output results as csv records or cloud monitoring | `csv`, `cloud-monitoring` | `cloud-monitoring` |
18+
| -api | which API to use | `JSON`: use JSON to upload and XML to download <br> `XML`: use JSON to upload and XML to download <br> `GRPC`: use GRPC <br> `MIXED`: select an API at random for each upload/download <br> `DirectPath`: use GRPC with direct path | `MIXED` |
19+
| -r | bucket region for benchmarks | any GCS region | `US-WEST1` |
20+
| -workers | number of goroutines to run at once; set to 1 for no concurrency | any positive integer | `16` |
21+
| -t | timeout (maximum time running benchmarks) <br> the program may run for longer while it finishes running processes | any [time.Duration](https://pkg.go.dev/time#Duration) | `1h` |
22+
| -min_samples | minimum number of objects to upload | any positive integer | `10` |
23+
| -max_samples | maximum number of objects to upload | any positive integer | `10 000` |
24+
| -gc_f | whether to force garbage collection <br> before every write or read benchmark | `true` or `false` (present/not present) | `false` |
25+
| -min_size | minimum object size in bytes | any positive integer | `512` |
26+
| -max_size | maximum object size in bytes | any positive integer | `2 097 152` (2 GiB) |
27+
| -defaults | use default settings for the client <br> (conn_pool, read, write and chunk size parameters will be ignored) | `true` or `false` | `false`
28+
| -conn_pool | GRPC connection pool size | any positive integer | 4 |
29+
| -min_cs | minimum ChunkSize in bytes | any positive integer | `16 384` (16 MiB) |
30+
| -max_cs | maximum ChunkSize in bytes | any positive integer | `16 384` (16 MiB) |
31+
| -q_read | download quantum | any positive integer | 1 |
32+
| -q_write | upload quantum | any positive integer | 1 |
33+
| -min_r_size | minimum read size in bytes | any positive integer | `4000` |
34+
| -max_r_size | maximum read size in bytes | any positive integer | `4000` |
35+
| -min_w_size | minimum write size in bytes | any positive integer | `4000` |
36+
| -max_w_size | maximum write size in bytes | any positive integer | `4000` |
37+
| -labels | labels added to cloud monitoring output (ignored when outputting as csv) | any string; should be in the format: <br> `stringKey=\"value\",intKey=3,boolKey=true` | empty |
38+
39+
\* required values
40+
41+
Note: while the default read/write size for HTTP clients is 4Kb
42+
(the default for this benchmarking), the default for GRPC is 32Kb.
43+
If you want to capture performance using the defaults for GRPC run the script
44+
separately setting the read and write sizes to 32Kb, or run with the `defaults`
45+
parameter set.
+221
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
// Copyright 2022 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"log"
21+
"net"
22+
"net/http"
23+
"os"
24+
"sync"
25+
"time"
26+
27+
"cloud.google.com/go/storage"
28+
"golang.org/x/net/http2"
29+
"google.golang.org/api/option"
30+
htransport "google.golang.org/api/transport/http"
31+
"google.golang.org/grpc"
32+
)
33+
34+
// clientPool functions much like a sync Pool (https://pkg.go.dev/sync#Pool),
35+
// except it does not automatically remove items stored in the clientPool.
36+
// Re-using the clients rather than creating a new one each time reduces overhead
37+
// (such as re-creating the underlying HTTP client and opening credential files),
38+
// and is the intended way to use Storage clients.
39+
//
40+
// There is no limit to how many clients will be created, but it should be around
41+
// the order of 5 * min(workers, max_samples).
42+
type clientPool struct {
43+
New func() *storage.Client
44+
clients []*storage.Client
45+
}
46+
47+
func (p *clientPool) Get() *storage.Client {
48+
// Create the slice if not already created
49+
if p.clients == nil {
50+
p.clients = make([]*storage.Client, 0)
51+
}
52+
53+
// If there is an unused client, return it
54+
if len(p.clients) > 0 {
55+
c := p.clients[0]
56+
p.clients = p.clients[1:]
57+
return c
58+
}
59+
60+
// Otherwise, create a new client and return it
61+
return p.New()
62+
}
63+
64+
func (p *clientPool) Put(c *storage.Client) {
65+
p.clients = append(p.clients, c)
66+
}
67+
68+
// we can share clients as long as the app buffer sizes are constant
69+
var httpClients, gRPCClients *clientPool
70+
71+
var nonBenchmarkingClients = clientPool{
72+
New: func() *storage.Client {
73+
// For debuggability's sake, these are HTTP
74+
clientMu.Lock()
75+
client, err := storage.NewClient(context.Background())
76+
clientMu.Unlock()
77+
if err != nil {
78+
log.Fatalf("storage.NewClient: %v", err)
79+
}
80+
81+
return client
82+
},
83+
}
84+
85+
func initializeClientPools(opts *benchmarkOptions) func() {
86+
httpClients = &clientPool{
87+
New: func() *storage.Client {
88+
client, err := initializeHTTPClient(context.Background(), opts.minWriteSize, opts.maxReadSize, opts.useDefaults)
89+
if err != nil {
90+
log.Fatalf("initializeHTTPClient: %v", err)
91+
}
92+
93+
return client
94+
},
95+
}
96+
97+
gRPCClients = &clientPool{
98+
New: func() *storage.Client {
99+
client, err := initializeGRPCClient(context.Background(), opts.minWriteSize, opts.maxReadSize, opts.connPoolSize, opts.useDefaults)
100+
if err != nil {
101+
log.Fatalf("initializeGRPCClient: %v", err)
102+
}
103+
return client
104+
},
105+
}
106+
107+
return func() {
108+
for _, c := range httpClients.clients {
109+
c.Close()
110+
}
111+
for _, c := range gRPCClients.clients {
112+
c.Close()
113+
}
114+
}
115+
}
116+
117+
// We can't pool storage clients if we need to change parameters at the HTTP or GRPC client level,
118+
// since we can't access those after creation as it is set up now.
119+
// If we are using defaults (ie. not creating an underlying HTTP client ourselves), or if
120+
// we are only interested in one app buffer size at a time, we don't need to change anything on the underlying
121+
// client and can re-use it (and therefore the storage client) for other benchmark runs.
122+
func canUseClientPool(opts *benchmarkOptions) bool {
123+
return opts.useDefaults || (opts.maxReadSize == opts.minReadSize && opts.maxWriteSize == opts.minWriteSize)
124+
}
125+
126+
func getClient(ctx context.Context, opts *benchmarkOptions, br benchmarkResult) (*storage.Client, func() error, error) {
127+
noOp := func() error { return nil }
128+
grpc := br.params.api == grpcAPI || br.params.api == directPath
129+
if canUseClientPool(opts) {
130+
if grpc {
131+
c := gRPCClients.Get()
132+
return c, func() error { gRPCClients.Put(c); return nil }, nil
133+
}
134+
c := httpClients.Get()
135+
return c, func() error { httpClients.Put(c); return nil }, nil
136+
}
137+
138+
// if necessary, create a client
139+
if grpc {
140+
c, err := initializeGRPCClient(ctx, br.params.appBufferSize, br.params.appBufferSize, opts.connPoolSize, false)
141+
if err != nil {
142+
return nil, noOp, fmt.Errorf("initializeGRPCClient: %w", err)
143+
}
144+
return c, c.Close, nil
145+
}
146+
c, err := initializeHTTPClient(ctx, br.params.appBufferSize, br.params.appBufferSize, false)
147+
if err != nil {
148+
return nil, noOp, fmt.Errorf("initializeHTTPClient: %w", err)
149+
}
150+
return c, c.Close, nil
151+
}
152+
153+
// mutex on starting a client so that we can set an env variable for GRPC clients
154+
var clientMu sync.Mutex
155+
156+
func initializeHTTPClient(ctx context.Context, writeBufferSize, readBufferSize int, useDefaults bool) (*storage.Client, error) {
157+
if useDefaults {
158+
clientMu.Lock()
159+
c, err := storage.NewClient(ctx, option.WithCredentialsFile(credentialsFile))
160+
clientMu.Unlock()
161+
return c, err
162+
}
163+
164+
dialer := &net.Dialer{
165+
Timeout: 30 * time.Second,
166+
KeepAlive: 30 * time.Second,
167+
}
168+
169+
// These are the default parameters with write and read buffer sizes modified
170+
base := &http.Transport{
171+
Proxy: http.ProxyFromEnvironment,
172+
DialContext: dialer.DialContext,
173+
ForceAttemptHTTP2: true,
174+
MaxIdleConns: 100,
175+
IdleConnTimeout: 90 * time.Second,
176+
TLSHandshakeTimeout: 10 * time.Second,
177+
ExpectContinueTimeout: 1 * time.Second,
178+
WriteBufferSize: writeBufferSize,
179+
ReadBufferSize: readBufferSize,
180+
}
181+
182+
http2Trans, err := http2.ConfigureTransports(base)
183+
if err == nil {
184+
http2Trans.ReadIdleTimeout = time.Second * 31
185+
}
186+
187+
trans, err := htransport.NewTransport(ctx, base,
188+
option.WithScopes("https://www.googleapis.com/auth/devstorage.full_control"),
189+
option.WithCredentialsFile(credentialsFile))
190+
if err != nil {
191+
return nil, err
192+
}
193+
194+
clientMu.Lock()
195+
client, err := storage.NewClient(ctx, option.WithHTTPClient(&http.Client{Transport: trans}))
196+
clientMu.Unlock()
197+
198+
return client, err
199+
}
200+
201+
func initializeGRPCClient(ctx context.Context, writeBufferSize, readBufferSize int, connPoolSize int, useDefaults bool) (*storage.Client, error) {
202+
if useDefaults {
203+
clientMu.Lock()
204+
os.Setenv("STORAGE_USE_GRPC", "true")
205+
c, err := storage.NewClient(ctx, option.WithCredentialsFile(credentialsFile))
206+
os.Unsetenv("STORAGE_USE_GRPC")
207+
clientMu.Unlock()
208+
return c, err
209+
}
210+
211+
clientMu.Lock()
212+
os.Setenv("STORAGE_USE_GRPC", "true")
213+
client, err := storage.NewClient(ctx, option.WithCredentialsFile(credentialsFile),
214+
option.WithGRPCDialOption(grpc.WithReadBufferSize(readBufferSize)),
215+
option.WithGRPCDialOption(grpc.WithWriteBufferSize(writeBufferSize)),
216+
option.WithGRPCConnectionPool(connPoolSize))
217+
os.Unsetenv("STORAGE_USE_GRPC")
218+
clientMu.Unlock()
219+
220+
return client, err
221+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright 2022 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"io"
21+
"os"
22+
"time"
23+
24+
"cloud.google.com/go/storage"
25+
)
26+
27+
type downloadOpts struct {
28+
client *storage.Client
29+
objectSize int64
30+
bucket string
31+
object string
32+
}
33+
34+
func downloadBenchmark(ctx context.Context, dopts downloadOpts) (elapsedTime time.Duration, rerr error) {
35+
// Set timer
36+
start := time.Now()
37+
// Multiple defer statements execute in LIFO order, so this will be the last
38+
// thing executed. We use named return parameters so that we can set it directly
39+
// and defer the statement so that the time includes typical cleanup steps and
40+
// gets set regardless of errors.
41+
defer func() { elapsedTime = time.Since(start) }()
42+
43+
// Set additional timeout
44+
ctx, cancel := context.WithTimeout(ctx, time.Minute)
45+
defer cancel()
46+
47+
// Create file to download to
48+
f, err := os.CreateTemp("", objectPrefix)
49+
if err != nil {
50+
rerr = fmt.Errorf("os.Create: %w", err)
51+
return
52+
}
53+
defer func() {
54+
closeErr := f.Close()
55+
removeErr := os.Remove(f.Name())
56+
// if we don't have another error to return, return error for closing file
57+
// if that error is also nil, return removeErr
58+
if rerr == nil {
59+
rerr = removeErr
60+
if closeErr != nil {
61+
rerr = closeErr
62+
}
63+
}
64+
}()
65+
66+
// Get reader from object
67+
o := dopts.client.Bucket(dopts.bucket).Object(dopts.object)
68+
objectReader, err := o.NewReader(ctx)
69+
if err != nil {
70+
rerr = fmt.Errorf("Object(%q).NewReader: %w", o.ObjectName(), err)
71+
return
72+
}
73+
defer func() {
74+
err := objectReader.Close()
75+
if rerr == nil {
76+
rerr = err
77+
}
78+
}()
79+
80+
// Download
81+
written, err := io.Copy(f, objectReader)
82+
if err != nil {
83+
rerr = fmt.Errorf("io.Copy: %w", err)
84+
return
85+
}
86+
87+
if written != dopts.objectSize {
88+
rerr = fmt.Errorf("did not read all bytes; read: %d, expected to read: %d", written, dopts.objectSize)
89+
return
90+
}
91+
92+
return
93+
}

0 commit comments

Comments
 (0)