Skip to content
This repository was archived by the owner on Jul 11, 2023. It is now read-only.

Commit 5376e77

Browse files
author
Sotiris Nanopoulos
committed
feat(injector): Set probe timeouts based on pod deployment spec
Fixes #4137 * Remove connect timeouts from all clusters * For health probe clusters we set the idle_timeout to be equal to the timeout set on the kubernetes pod. Signed-off-by: Sotiris Nanopoulos <[email protected]>
1 parent fa976a6 commit 5376e77

21 files changed

+107
-112
lines changed

pkg/envoy/cds/cluster.go

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,6 @@ import (
2323
"github.com/openservicemesh/osm/pkg/trafficpolicy"
2424
)
2525

26-
const (
27-
// clusterConnectTimeout is the timeout duration used by Envoy to timeout connections to the cluster
28-
clusterConnectTimeout = 1 * time.Second
29-
)
30-
3126
// replacer used to configure an Envoy cluster's altStatName
3227
var replacer = strings.NewReplacer(".", "_", ":", "_")
3328

@@ -49,7 +44,6 @@ func getUpstreamServiceCluster(downstreamIdentity identity.ServiceIdentity, conf
4944

5045
remoteCluster := &xds_cluster.Cluster{
5146
Name: config.Name,
52-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
5347
TypedExtensionProtocolOptions: HTTP2ProtocolOptions,
5448
TransportSocket: &xds_core.TransportSocket{
5549
Name: wellknown.TransportSocketTls,
@@ -78,8 +72,7 @@ func getMulticlusterGatewayUpstreamServiceCluster(catalog catalog.MeshCataloger,
7872
}
7973

8074
remoteCluster := &xds_cluster.Cluster{
81-
Name: upstreamSvc.ServerName(),
82-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
75+
Name: upstreamSvc.ServerName(),
8376
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
8477
Type: xds_cluster.Cluster_STRICT_DNS,
8578
},
@@ -142,11 +135,10 @@ func getLocalServiceCluster(config trafficpolicy.MeshClusterConfig) *xds_cluster
142135

143136
return &xds_cluster.Cluster{
144137
// The name must match the domain being cURLed in the demo
145-
Name: config.Name,
146-
AltStatName: config.Name,
147-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
148-
LbPolicy: xds_cluster.Cluster_ROUND_ROBIN,
149-
RespectDnsTtl: true,
138+
Name: config.Name,
139+
AltStatName: config.Name,
140+
LbPolicy: xds_cluster.Cluster_ROUND_ROBIN,
141+
RespectDnsTtl: true,
150142
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
151143
Type: xds_cluster.Cluster_STRICT_DNS,
152144
},
@@ -180,9 +172,8 @@ func getLocalServiceCluster(config trafficpolicy.MeshClusterConfig) *xds_cluster
180172
// getPrometheusCluster returns an Envoy Cluster responsible for scraping metrics by Prometheus
181173
func getPrometheusCluster() *xds_cluster.Cluster {
182174
return &xds_cluster.Cluster{
183-
Name: constants.EnvoyMetricsCluster,
184-
AltStatName: constants.EnvoyMetricsCluster,
185-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
175+
Name: constants.EnvoyMetricsCluster,
176+
AltStatName: constants.EnvoyMetricsCluster,
186177
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
187178
Type: xds_cluster.Cluster_STATIC,
188179
},
@@ -259,9 +250,8 @@ func getDNSResolvableEgressCluster(config *trafficpolicy.EgressClusterConfig) (*
259250
}
260251

261252
return &xds_cluster.Cluster{
262-
Name: config.Name,
263-
AltStatName: formatAltStatNameForPrometheus(config.Name),
264-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
253+
Name: config.Name,
254+
AltStatName: formatAltStatNameForPrometheus(config.Name),
265255
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
266256
Type: xds_cluster.Cluster_STRICT_DNS,
267257
},
@@ -295,8 +285,7 @@ func getOriginalDestinationEgressCluster(name string) (*xds_cluster.Cluster, err
295285
}
296286

297287
return &xds_cluster.Cluster{
298-
Name: name,
299-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
288+
Name: name,
300289
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
301290
Type: xds_cluster.Cluster_ORIGINAL_DST,
302291
},

pkg/envoy/cds/cluster_test.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@ package cds
22

33
import (
44
"testing"
5-
"time"
65

76
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
87
xds_core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
98
xds_endpoint "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
109
"github.com/golang/mock/gomock"
11-
"github.com/golang/protobuf/ptypes"
1210
"github.com/golang/protobuf/ptypes/wrappers"
1311
tassert "github.com/stretchr/testify/assert"
1412

@@ -188,7 +186,6 @@ func TestGetLocalServiceCluster(t *testing.T) {
188186
} else {
189187
assert.Equal(tc.clusterConfig.Name, cluster.Name)
190188
assert.Equal(tc.clusterConfig.Name, cluster.AltStatName)
191-
assert.Equal(ptypes.DurationProto(clusterConnectTimeout), cluster.ConnectTimeout)
192189
assert.Equal(xds_cluster.Cluster_ROUND_ROBIN, cluster.LbPolicy)
193190
assert.Equal(&xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STRICT_DNS}, cluster.ClusterDiscoveryType)
194191
assert.Equal(true, cluster.RespectDnsTtl)
@@ -209,7 +206,6 @@ func TestGetPrometheusCluster(t *testing.T) {
209206
AltStatName: constants.EnvoyMetricsCluster,
210207
ClusterDiscoveryType: &xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STATIC},
211208
EdsClusterConfig: nil,
212-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
213209
LoadAssignment: &xds_endpoint.ClusterLoadAssignment{
214210
ClusterName: constants.EnvoyMetricsCluster,
215211
Endpoints: []*xds_endpoint.LocalityLbEndpoints{
@@ -261,8 +257,7 @@ func TestGetOriginalDestinationEgressCluster(t *testing.T) {
261257
name: "foo cluster",
262258
clusterName: "foo",
263259
expected: &xds_cluster.Cluster{
264-
Name: "foo",
265-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
260+
Name: "foo",
266261
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
267262
Type: xds_cluster.Cluster_ORIGINAL_DST,
268263
},
@@ -274,8 +269,7 @@ func TestGetOriginalDestinationEgressCluster(t *testing.T) {
274269
name: "bar cluster",
275270
clusterName: "bar",
276271
expected: &xds_cluster.Cluster{
277-
Name: "bar",
278-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
272+
Name: "bar",
279273
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
280274
Type: xds_cluster.Cluster_ORIGINAL_DST,
281275
},
@@ -371,9 +365,8 @@ func TestGetDNSResolvableEgressCluster(t *testing.T) {
371365
Port: 80,
372366
},
373367
expectedCluster: &xds_cluster.Cluster{
374-
Name: "foo.com:80",
375-
AltStatName: "foo_com_80",
376-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
368+
Name: "foo.com:80",
369+
AltStatName: "foo_com_80",
377370
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
378371
Type: xds_cluster.Cluster_STRICT_DNS,
379372
},

pkg/envoy/cds/response_test.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"fmt"
66
"testing"
7-
"time"
87

98
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
109
xds_core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
@@ -145,7 +144,6 @@ func TestNewResponse(t *testing.T) {
145144
AltStatName: "default/bookbuyer|8080|local",
146145
ClusterDiscoveryType: &xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STRICT_DNS},
147146
EdsClusterConfig: nil,
148-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
149147
RespectDnsTtl: true,
150148
DnsLookupFamily: xds_cluster.Cluster_V4_ONLY,
151149
TypedExtensionProtocolOptions: HTTP2ProtocolOptions,
@@ -199,7 +197,6 @@ func TestNewResponse(t *testing.T) {
199197
},
200198
ServiceName: "",
201199
},
202-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
203200
TransportSocket: &xds_core.TransportSocket{
204201
Name: wellknown.TransportSocketTls,
205202
ConfigType: &xds_core.TransportSocket_TypedConfig{
@@ -228,7 +225,6 @@ func TestNewResponse(t *testing.T) {
228225
},
229226
ServiceName: "",
230227
},
231-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
232228
TransportSocket: &xds_core.TransportSocket{
233229
Name: wellknown.TransportSocketTls,
234230
ConfigType: &xds_core.TransportSocket_TypedConfig{
@@ -308,7 +304,6 @@ func TestNewResponse(t *testing.T) {
308304
AltStatName: constants.EnvoyMetricsCluster,
309305
ClusterDiscoveryType: &xds_cluster.Cluster_Type{Type: xds_cluster.Cluster_STATIC},
310306
EdsClusterConfig: nil,
311-
ConnectTimeout: ptypes.DurationProto(1 * time.Second),
312307
LoadAssignment: &xds_endpoint.ClusterLoadAssignment{
313308
ClusterName: constants.EnvoyMetricsCluster,
314309
Endpoints: []*xds_endpoint.LocalityLbEndpoints{

pkg/envoy/cds/tracing.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package cds
33
import (
44
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
55
xds_endpoint "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
6-
"github.com/golang/protobuf/ptypes"
76

87
"github.com/openservicemesh/osm/pkg/configurator"
98
"github.com/openservicemesh/osm/pkg/constants"
@@ -12,9 +11,8 @@ import (
1211

1312
func getTracingCluster(cfg configurator.Configurator) *xds_cluster.Cluster {
1413
return &xds_cluster.Cluster{
15-
Name: constants.EnvoyTracingCluster,
16-
AltStatName: constants.EnvoyTracingCluster,
17-
ConnectTimeout: ptypes.DurationProto(clusterConnectTimeout),
14+
Name: constants.EnvoyTracingCluster,
15+
AltStatName: constants.EnvoyTracingCluster,
1816
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
1917
Type: xds_cluster.Cluster_LOGICAL_DNS,
2018
},

pkg/injector/envoy_config_health_probes.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55

66
"github.com/envoyproxy/go-control-plane/pkg/wellknown"
77
"github.com/golang/protobuf/ptypes"
8-
"google.golang.org/protobuf/types/known/durationpb"
98
"google.golang.org/protobuf/types/known/structpb"
109

1110
"github.com/openservicemesh/osm/pkg/constants"
@@ -56,8 +55,7 @@ func getStartupCluster(originalProbe *healthProbe) *xds_cluster.Cluster {
5655

5756
func getProbeCluster(clusterName string, port int32) *xds_cluster.Cluster {
5857
return &xds_cluster.Cluster{
59-
Name: clusterName,
60-
ConnectTimeout: durationpb.New(time.Second),
58+
Name: clusterName,
6159
ClusterDiscoveryType: &xds_cluster.Cluster_Type{
6260
Type: xds_cluster.Cluster_STATIC,
6361
},
@@ -128,7 +126,7 @@ func getProbeListener(listenerName, clusterName, newPath string, port int32, ori
128126
RouteConfig: &xds_route.RouteConfiguration{
129127
Name: "local_route",
130128
VirtualHosts: []*xds_route.VirtualHost{
131-
getVirtualHost(newPath, clusterName, originalProbe.path),
129+
getVirtualHost(newPath, clusterName, originalProbe.path, originalProbe.timeout),
132130
},
133131
},
134132
},
@@ -204,7 +202,13 @@ func getProbeListener(listenerName, clusterName, newPath string, port int32, ori
204202
}, nil
205203
}
206204

207-
func getVirtualHost(newPath, clusterName, originalProbePath string) *xds_route.VirtualHost {
205+
func getVirtualHost(newPath, clusterName, originalProbePath string, routeTimeout time.Duration) *xds_route.VirtualHost {
206+
if routeTimeout < 1*time.Second {
207+
// This should never happen in practice because the minimum value in Kubernetes
208+
// is set to 1. However it is easy to check and setting the timeout to 0 will lead
209+
// to leaks.
210+
routeTimeout = 1 * time.Second
211+
}
208212
return &xds_route.VirtualHost{
209213
Name: "local_service",
210214
Domains: []string{
@@ -223,6 +227,7 @@ func getVirtualHost(newPath, clusterName, originalProbePath string) *xds_route.V
223227
Cluster: clusterName,
224228
},
225229
PrefixRewrite: originalProbePath,
230+
Timeout: ptypes.DurationProto(routeTimeout),
226231
},
227232
},
228233
},

pkg/injector/envoy_config_health_probes_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package injector
22

33
import (
44
"testing"
5+
"time"
56

67
xds_cluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
78
xds_listener "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
@@ -14,17 +15,21 @@ import (
1415

1516
var _ = ginkgo.Describe("Test functions creating Envoy config and rewriting the Pod's health probes to pass through Envoy", func() {
1617

17-
liveness := &healthProbe{path: "/liveness", port: 81, isHTTP: true}
18-
livenessNonHTTP := &healthProbe{port: 81, isHTTP: false}
19-
readiness := &healthProbe{path: "/readiness", port: 82, isHTTP: true}
20-
startup := &healthProbe{path: "/startup", port: 83, isHTTP: true}
18+
timeout := 42 * time.Second
19+
liveness := &healthProbe{path: "/liveness", port: 81, isHTTP: true, timeout: timeout}
20+
livenessNonHTTP := &healthProbe{port: 81, isHTTP: false, timeout: timeout}
21+
readiness := &healthProbe{path: "/readiness", port: 82, isHTTP: true, timeout: timeout}
22+
startup := &healthProbe{path: "/startup", port: 83, isHTTP: true, timeout: timeout}
2123

2224
// Listed below are the functions we are going to test.
2325
// The key in the map is the name of the function -- must match what's in the value of the map.
2426
// The key (function name) is used to locate and load the YAML file with the expected return for this function.
2527
clusterFunctionsToTest := map[string]func() protoreflect.ProtoMessage{
2628
"getVirtualHosts": func() protoreflect.ProtoMessage {
27-
return getVirtualHost("/some/path", "-cluster-name-", "/original/probe/path")
29+
return getVirtualHost("/some/path", "-cluster-name-", "/original/probe/path", timeout)
30+
},
31+
"getVirtualHostsDefault": func() protoreflect.ProtoMessage {
32+
return getVirtualHost("/some/path", "-cluster-name-", "/original/probe/path", 0*time.Second)
2833
},
2934
"getProbeCluster": func() protoreflect.ProtoMessage { return getProbeCluster("cluster-name", 12341234) },
3035
"getLivenessCluster": func() protoreflect.ProtoMessage { return getLivenessCluster(liveness) },

pkg/injector/health_probes.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package injector
22

33
import (
44
"errors"
5+
"time"
56

67
corev1 "k8s.io/api/core/v1"
78
"k8s.io/apimachinery/pkg/util/intstr"
@@ -20,8 +21,9 @@ const (
2021
var errNoMatchingPort = errors.New("no matching port")
2122

2223
type healthProbe struct {
23-
path string
24-
port int32
24+
path string
25+
port int32
26+
timeout time.Duration
2527

2628
// isHTTP corresponds to an httpGet probe with a scheme of HTTP or undefined.
2729
// This helps inform what kind of Envoy config to add to the pod.
@@ -89,6 +91,7 @@ func rewriteProbe(probe *corev1.Probe, probeType, path string, port int32, conta
8991
log.Error().Err(err).Msgf("Error finding a matching port for %+v on container %+v", *definedPort, containerPorts)
9092
}
9193
*definedPort = intstr.IntOrString{Type: intstr.Int, IntVal: port}
94+
originalProbe.timeout = time.Duration(probe.TimeoutSeconds) * time.Second
9295

9396
log.Debug().Msgf(
9497
"Rewriting %s probe (:%d%s) to :%d%s",

0 commit comments

Comments
 (0)