Skip to content
This repository was archived by the owner on Jul 11, 2023. It is now read-only.

Commit 3e727ed

Browse files
author
Sotiris Nanopoulos
authored
feat(injector): Set probe timeouts based on pod deployment spec (#4149)
Fixes #4137 Overall we aim to maintain the following invariance: For all health probes we want the following to be true: All Envoy (implicit or explicit) timeouts >= the timeout specified in the pod deployment spec. Changes: * Remove connect timeouts from all clusters * For health probe clusters we set the route timeout to be equal to the timeout set on the kubernetes pod. This is only applied on HTTP routes since plain tcp routes have infinite timeout. Signed-off-by: Sotiris Nanopoulos <[email protected]>
1 parent 02623dd commit 3e727ed

21 files changed

+107
-112
lines changed

pkg/envoy/cds/cluster.go

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,6 @@ import (
2323
"github.com/openservicemesh/osm/pkg/trafficpolicy"
2424
)
2525

26-
const (
27-
// clusterConnectTimeout is the timeout duration used by Envoy to timeout connections to the cluster
28-
clusterConnectTimeout = 1 * time.Second
29-
)
30-
3126
// replacer used to configure an Envoy cluster's altStatName
3227
var replacer = strings.NewReplacer(".", "_", ":", "_")
3328

@@ -49,7 +44,6 @@ func getUpstreamServiceCluster(downstreamIdentity identity.ServiceIdentity, conf
4944

5045
remoteCluster := &xds_cluster.Cluster{
5146
Name: config.Name,
52-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
5347
TypedExtensionProtocolOptions: HTTP2ProtocolOptions,
5448
TransportSocket: &xds_core.TransportSocket{
5549
Name: wellknown.TransportSocketTls,
@@ -78,8 +72,7 @@ func getMulticlusterGatewayUpstreamServiceCluster(catalog catalog.MeshCataloger,
7872
}
7973

8074
remoteCluster := &xds_cluster.Cluster{
81-
Name: upstreamSvc.ServerName(),
82-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
75+
Name: upstreamSvc.ServerName(),
8376
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
8477
Type: xds_cluster.Cluster_STRICT_DNS,
8578
},
@@ -142,11 +135,10 @@ func getLocalServiceCluster(config trafficpolicy.MeshClusterConfig) *xds_cluster
142135

143136
return &xds_cluster.Cluster{
144137
// The name must match the domain being cURLed in the demo
145-
Name: config.Name,
146-
AltStatName: config.Name,
147-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
148-
LbPolicy: xds_cluster.Cluster_ROUND_ROBIN,
149-
RespectDnsTtl: true,
138+
Name: config.Name,
139+
AltStatName: config.Name,
140+
LbPolicy: xds_cluster.Cluster_ROUND_ROBIN,
141+
RespectDnsTtl: true,
150142
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
151143
Type: xds_cluster.Cluster_STRICT_DNS,
152144
},
@@ -180,9 +172,8 @@ func getLocalServiceCluster(config trafficpolicy.MeshClusterConfig) *xds_cluster
180172
// getPrometheusCluster returns an Envoy Cluster responsible for scraping metrics by Prometheus
181173
func getPrometheusCluster() *xds_cluster.Cluster {
182174
return &xds_cluster.Cluster{
183-
Name: constants.EnvoyMetricsCluster,
184-
AltStatName: constants.EnvoyMetricsCluster,
185-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
175+
Name: constants.EnvoyMetricsCluster,
176+
AltStatName: constants.EnvoyMetricsCluster,
186177
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
187178
Type: xds_cluster.Cluster_STATIC,
188179
},
@@ -259,9 +250,8 @@ func getDNSResolvableEgressCluster(config *trafficpolicy.EgressClusterConfig) (*
259250
}
260251

261252
return &xds_cluster.Cluster{
262-
Name: config.Name,
263-
AltStatName: formatAltStatNameForPrometheus(config.Name),
264-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
253+
Name: config.Name,
254+
AltStatName: formatAltStatNameForPrometheus(config.Name),
265255
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
266256
Type: xds_cluster.Cluster_STRICT_DNS,
267257
},
@@ -295,8 +285,7 @@ func getOriginalDestinationEgressCluster(name string) (*xds_cluster.Cluster, err
295285
}
296286

297287
return &xds_cluster.Cluster{
298-
Name: name,
299-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
288+
Name: name,
300289
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
301290
Type: xds_cluster.Cluster_ORIGINAL_DST,
302291
},

pkg/envoy/cds/cluster_test.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@ package cds
22

33
import (
44
"testing"
5-
"time"
65

76
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
87
xds_core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
98
xds_endpoint "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
109
"github.com/golang/mock/gomock"
11-
"github.com/golang/protobuf/ptypes"
1210
"github.com/golang/protobuf/ptypes/wrappers"
1311
tassert "github.com/stretchr/testify/assert"
1412

@@ -188,7 +186,6 @@ func TestGetLocalServiceCluster(t *testing.T) {
188186
} else {
189187
assert.Equal(tc.clusterConfig.Name, cluster.Name)
190188
assert.Equal(tc.clusterConfig.Name, cluster.AltStatName)
191-
assert.Equal(ptypes.DurationProto(clusterConnectTimeout), cluster.ConnectTimeout)
192189
assert.Equal(xds_cluster.Cluster_ROUND_ROBIN, cluster.LbPolicy)
193190
assert.Equal(&xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STRICT_DNS}, cluster.ClusterDiscoveryType)
194191
assert.Equal(true, cluster.RespectDnsTtl)
@@ -209,7 +206,6 @@ func TestGetPrometheusCluster(t *testing.T) {
209206
AltStatName: constants.EnvoyMetricsCluster,
210207
ClusterDiscoveryType: &xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STATIC},
211208
EdsClusterConfig: nil,
212-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
213209
LoadAssignment: &xds_endpoint.ClusterLoadAssignment{
214210
ClusterName: constants.EnvoyMetricsCluster,
215211
Endpoints: []*xds_endpoint.LocalityLbEndpoints{
@@ -261,8 +257,7 @@ func TestGetOriginalDestinationEgressCluster(t *testing.T) {
261257
name: "foo cluster",
262258
clusterName: "foo",
263259
expected: &xds_cluster.Cluster{
264-
Name: "foo",
265-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
260+
Name: "foo",
266261
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
267262
Type: xds_cluster.Cluster_ORIGINAL_DST,
268263
},
@@ -274,8 +269,7 @@ func TestGetOriginalDestinationEgressCluster(t *testing.T) {
274269
name: "bar cluster",
275270
clusterName: "bar",
276271
expected: &xds_cluster.Cluster{
277-
Name: "bar",
278-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
272+
Name: "bar",
279273
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
280274
Type: xds_cluster.Cluster_ORIGINAL_DST,
281275
},
@@ -371,9 +365,8 @@ func TestGetDNSResolvableEgressCluster(t *testing.T) {
371365
Port: 80,
372366
},
373367
expectedCluster: &xds_cluster.Cluster{
374-
Name: "foo.com:80",
375-
AltStatName: "foo_com_80",
376-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
368+
Name: "foo.com:80",
369+
AltStatName: "foo_com_80",
377370
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
378371
Type: xds_cluster.Cluster_STRICT_DNS,
379372
},

pkg/envoy/cds/response_test.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"fmt"
66
"testing"
7-
"time"
87

98
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
109
xds_core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
@@ -145,7 +144,6 @@ func TestNewResponse(t *testing.T) {
145144
AltStatName: "default/bookbuyer|8080|local",
146145
ClusterDiscoveryType: &xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STRICT_DNS},
147146
EdsClusterConfig: nil,
148-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
149147
RespectDnsTtl: true,
150148
DnsLookupFamily: xds_cluster.Cluster_V4_ONLY,
151149
TypedExtensionProtocolOptions: HTTP2ProtocolOptions,
@@ -199,7 +197,6 @@ func TestNewResponse(t *testing.T) {
199197
},
200198
ServiceName: "",
201199
},
202-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
203200
TransportSocket: &xds_core.TransportSocket{
204201
Name: wellknown.TransportSocketTls,
205202
ConfigType: &xds_core.TransportSocket_TypedConfig{
@@ -228,7 +225,6 @@ func TestNewResponse(t *testing.T) {
228225
},
229226
ServiceName: "",
230227
},
231-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
232228
TransportSocket: &xds_core.TransportSocket{
233229
Name: wellknown.TransportSocketTls,
234230
ConfigType: &xds_core.TransportSocket_TypedConfig{
@@ -308,7 +304,6 @@ func TestNewResponse(t *testing.T) {
308304
AltStatName: constants.EnvoyMetricsCluster,
309305
ClusterDiscoveryType: &xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STATIC},
310306
EdsClusterConfig: nil,
311-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
312307
LoadAssignment: &xds_endpoint.ClusterLoadAssignment{
313308
ClusterName: constants.EnvoyMetricsCluster,
314309
Endpoints: []*xds_endpoint.LocalityLbEndpoints{

pkg/envoy/cds/tracing.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package cds
33
import (
44
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
55
xds_endpoint "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
6-
"github.com/golang/protobuf/ptypes"
76

87
"github.com/openservicemesh/osm/pkg/configurator"
98
"github.com/openservicemesh/osm/pkg/constants"
@@ -12,9 +11,8 @@ import (
1211

1312
func getTracingCluster(cfg configurator.Configurator) *xds_cluster.Cluster {
1413
return &xds_cluster.Cluster{
15-
Name: constants.EnvoyTracingCluster,
16-
AltStatName: constants.EnvoyTracingCluster,
17-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
14+
Name: constants.EnvoyTracingCluster,
15+
AltStatName: constants.EnvoyTracingCluster,
1816
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
1917
Type: xds_cluster.Cluster_LOGICAL_DNS,
2018
},

pkg/injector/envoy_config_health_probes.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55

66
"github.com/envoyproxy/go-control-plane/pkg/wellknown"
77
"github.com/golang/protobuf/ptypes"
8-
"google.golang.org/protobuf/types/known/durationpb"
98
"google.golang.org/protobuf/types/known/structpb"
109

1110
"github.com/openservicemesh/osm/pkg/constants"
@@ -56,8 +55,7 @@ func getStartupCluster(originalProbe *healthProbe) *xds_cluster.Cluster {
5655

5756
func getProbeCluster(clusterName string, port int32) *xds_cluster.Cluster {
5857
return &xds_cluster.Cluster{
59-
Name: clusterName,
60-
ConnectTimeout: durationpb.New(time.Second),
58+
Name: clusterName,
6159
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
6260
Type: xds_cluster.Cluster_STATIC,
6361
},
@@ -128,7 +126,7 @@ func getProbeListener(listenerName, clusterName, newPath string, port int32, ori
128126
RouteConfig: &xds_route.RouteConfiguration{
129127
Name: "local_route",
130128
VirtualHosts: []*xds_route.VirtualHost{
131-
getVirtualHost(newPath, clusterName, originalProbe.path),
129+
getVirtualHost(newPath, clusterName, originalProbe.path, originalProbe.timeout),
132130
},
133131
},
134132
},
@@ -204,7 +202,13 @@ func getProbeListener(listenerName, clusterName, newPath string, port int32, ori
204202
}, nil
205203
}
206204

207-
func getVirtualHost(newPath, clusterName, originalProbePath string) *xds_route.VirtualHost {
205+
func getVirtualHost(newPath, clusterName, originalProbePath string, routeTimeout time.Duration) *xds_route.VirtualHost {
206+
if routeTimeout < 1*time.Second {
207+
// This should never happen in practice because the minimum value in Kubernetes
208+
// is set to 1. However it is easy to check and setting the timeout to 0 will lead
209+
// to leaks.
210+
routeTimeout = 1 * time.Second
211+
}
208212
return &xds_route.VirtualHost{
209213
Name: "local_service",
210214
Domains: []string{
@@ -223,6 +227,7 @@ func getVirtualHost(newPath, clusterName, originalProbePath string) *xds_route.V
223227
Cluster: clusterName,
224228
},
225229
PrefixRewrite: originalProbePath,
230+
Timeout: ptypes.DurationProto(routeTimeout),
226231
},
227232
},
228233
},

pkg/injector/envoy_config_health_probes_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package injector
22

33
import (
44
"testing"
5+
"time"
56

67
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
78
xds_listener "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
@@ -14,17 +15,21 @@ import (
1415

1516
var _ = ginkgo.Describe("Test functions creating Envoy config and rewriting the Pod's health probes to pass through Envoy", func() {
1617

17-
liveness := &healthProbe{path: "/liveness", port: 81, isHTTP: true}
18-
livenessNonHTTP := &healthProbe{port: 81, isHTTP: false}
19-
readiness := &healthProbe{path: "/readiness", port: 82, isHTTP: true}
20-
startup := &healthProbe{path: "/startup", port: 83, isHTTP: true}
18+
timeout := 42 * time.Second
19+
liveness := &healthProbe{path: "/liveness", port: 81, isHTTP: true, timeout: timeout}
20+
livenessNonHTTP := &healthProbe{port: 81, isHTTP: false, timeout: timeout}
21+
readiness := &healthProbe{path: "/readiness", port: 82, isHTTP: true, timeout: timeout}
22+
startup := &healthProbe{path: "/startup", port: 83, isHTTP: true, timeout: timeout}
2123

2224
// Listed below are the functions we are going to test.
2325
// The key in the map is the name of the function -- must match what's in the value of the map.
2426
// The key (function name) is used to locate and load the YAML file with the expected return for this function.
2527
clusterFunctionsToTest := map[string]func() protoreflect.ProtoMessage{
2628
"getVirtualHosts": func() protoreflect.ProtoMessage {
27-
return getVirtualHost("/some/path", "-cluster-name-", "/original/probe/path")
29+
return getVirtualHost("/some/path", "-cluster-name-", "/original/probe/path", timeout)
30+
},
31+
"getVirtualHostsDefault": func() protoreflect.ProtoMessage {
32+
return getVirtualHost("/some/path", "-cluster-name-", "/original/probe/path", 0*time.Second)
2833
},
2934
"getProbeCluster": func() protoreflect.ProtoMessage { return getProbeCluster("cluster-name", 12341234) },
3035
"getLivenessCluster": func() protoreflect.ProtoMessage { return getLivenessCluster(liveness) },

pkg/injector/health_probes.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package injector
22

33
import (
44
"errors"
5+
"time"
56

67
corev1 "k8s.io/api/core/v1"
78
"k8s.io/apimachinery/pkg/util/intstr"
@@ -20,8 +21,9 @@ const (
2021
var errNoMatchingPort = errors.New("no matching port")
2122

2223
type healthProbe struct {
23-
path string
24-
port int32
24+
path string
25+
port int32
26+
timeout time.Duration
2527

2628
// isHTTP corresponds to an httpGet probe with a scheme of HTTP or undefined.
2729
// This helps inform what kind of Envoy config to add to the pod.
@@ -89,6 +91,7 @@ func rewriteProbe(probe *corev1.Probe, probeType, path string, port int32, conta
8991
log.Error().Err(err).Msgf("Error finding a matching port for %+v on container %+v", *definedPort, containerPorts)
9092
}
9193
*definedPort = intstr.IntOrString{Type: intstr.Int, IntVal: port}
94+
originalProbe.timeout = time.Duration(probe.TimeoutSeconds) * time.Second
9295

9396
log.Debug().Msgf(
9497
"Rewriting %s probe (:%d%s) to :%d%s",

0 commit comments

Comments
 (0)