Skip to content
This repository was archived by the owner on Apr 18, 2024. It is now read-only.

Commit aaf9ff9

Browse files
committed
fix: DefaultSaturnCarRequestTimeout
19s is not enough for fetching CAR stream of unknown length, every bigger request was failing. If we need to pick some ceiling, 30m sound like a good starting point (this is when CAR stream got timeouted on the old ipfs.io).
1 parent b4cc708 commit aaf9ff9

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

caboose.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,19 @@ type Config struct {
7575
MaxNCoolOff int
7676
}
7777

78+
const DefaultLoggingInterval = 5 * time.Second
79+
const DefaultSaturnLoggerRequestTimeout = 1 * time.Minute
80+
81+
const DefaultSaturnOrchestratorRequestTimeout = 19 * time.Second
82+
83+
const DefaultSaturnBlockRequestTimeout = 19 * time.Second
84+
const DefaultSaturnCarRequestTimeout = 30 * time.Minute
85+
7886
const DefaultMaxRetries = 3
7987
const DefaultPoolFailureDownvoteDebounce = 1 * time.Minute
8088
const DefaultPoolMembershipDebounce = 3 * DefaultPoolRefreshInterval
8189
const DefaultPoolLowWatermark = 5
82-
const DefaultSaturnRequestTimeout = 19 * time.Second
90+
8391
const maxBlockSize = 4194305 // 4 Mib + 1 byte
8492
const DefaultOrchestratorEndpoint = "https://orchestrator.strn.pl/nodes/nearby?count=1000"
8593
const DefaultPoolRefreshInterval = 5 * time.Minute
@@ -188,7 +196,7 @@ func NewCaboose(config *Config) (*Caboose, error) {
188196

189197
if c.config.SaturnClient == nil {
190198
c.config.SaturnClient = &http.Client{
191-
Timeout: DefaultSaturnRequestTimeout,
199+
Timeout: DefaultSaturnCarRequestTimeout,
192200
}
193201
}
194202
if c.config.OrchestratorEndpoint == nil {

cmd/caboose/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func main1() int {
5454

5555
LoggingEndpoint: *le,
5656
LoggingClient: http.DefaultClient,
57-
LoggingInterval: 5 * time.Second,
57+
LoggingInterval: DefaultLoggingInterval,
5858

5959
DoValidation: true,
6060
PoolRefresh: 5 * time.Minute,

fetcher.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string,
8181
isCacheHit := false
8282
networkError := ""
8383

84+
isBlockRequest := false
85+
if mime == "application/vnd.ipld.raw" {
86+
isBlockRequest = true
87+
}
88+
8489
defer func() {
8590
var ttfbMs int64
8691
durationSecs := time.Since(start).Seconds()
@@ -92,15 +97,15 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string,
9297
ttfbMs = fb.Sub(start).Milliseconds()
9398
fetchTTFBPerBlockPerPeerSuccessMetric.Observe(float64(ttfbMs))
9499
// track individual block metrics separately
95-
if mime == "application/vnd.ipld.raw" {
100+
if isBlockRequest {
96101
fetchDurationPerBlockPerPeerSuccessMetric.Observe(float64(response_success_end.Sub(start).Milliseconds()))
97102
} else {
98103
fetchDurationPerCarPerPeerSuccessMetric.Observe(float64(response_success_end.Sub(start).Milliseconds()))
99104
}
100105
fetchSpeedPerBlockPerPeerMetric.Observe(float64(received) / float64(durationMs))
101106
} else {
102107
fetchTTFBPerBlockPerPeerFailureMetric.Observe(float64(ttfbMs))
103-
if mime == "application/vnd.ipld.raw" {
108+
if isBlockRequest {
104109
fetchDurationPerBlockPerPeerFailureMetric.Observe(float64(time.Since(start).Milliseconds()))
105110
} else {
106111
fetchDurationPerCarPerPeerFailureMetric.Observe(float64(time.Since(start).Milliseconds()))
@@ -145,7 +150,16 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string,
145150
}
146151
}()
147152

148-
reqCtx, cancel := context.WithTimeout(ctx, DefaultSaturnRequestTimeout)
153+
// TODO: Ideally, we would have additional "PerRequestInactivityTimeout"
154+
// which is the amount of time without any NEW data from the server, but
155+
// that can be added later. We need both because a slow trickle of data
156+
// could take a large amount of time.
157+
requestTimeout := DefaultSaturnCarRequestTimeout
158+
if isBlockRequest {
159+
requestTimeout = DefaultSaturnBlockRequestTimeout
160+
}
161+
162+
reqCtx, cancel := context.WithTimeout(ctx, requestTimeout)
149163
defer cancel()
150164
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, reqUrl, nil)
151165
if err != nil {

0 commit comments

Comments
 (0)