Skip to content

Commit 67073a2

Browse files
committed
NSM datapath monitoring on TAPA NSC
Introduce optional NSM datapath monitoring/healing (or liveness check) between the tapa and proxy. Has to be enabled via new env variables introduced in the proxy.
1 parent 8f6cf60 commit 67073a2

File tree

5 files changed

+42
-18
lines changed

5 files changed

+42
-18
lines changed

cmd/tapa/config.go

+17-14
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,23 @@ import (
2323

2424
// Config for the TAPA
2525
type Config struct {
26-
Name string `default:"nsc" desc:"Name of the target"`
27-
Node string `default:"" desc:"Node name the target is running on" split_words:"true"`
28-
Namespace string `default:"default" desc:"Namespace the trenches to connect to are running on" split_words:"true"`
29-
Socket string `default:"/ambassador.sock" desc:"Path of the socket file of the TAPA" split_words:"true"`
30-
NSMSocket url.URL `default:"unix:///var/lib/networkservicemesh/nsm.io.sock" desc:"Path of the socket file of NSM" envconfig:"nsm_socket"`
31-
NSPServiceName string `default:"nsp-service" desc:"Domain name of the NSP Service" envconfig:"nsp_service_name"`
32-
NSPServicePort int `default:"7778" desc:"port of the NSP Service" envconfig:"nsp_service_port"`
33-
Timeout time.Duration `default:"15s" desc:"timeout of NSM request/close, NSP register/unregister..." split_words:"true"`
34-
DialTimeout time.Duration `default:"5s" desc:"timeout to dial NSMgr" split_words:"true"`
35-
MaxTokenLifetime time.Duration `default:"24h" desc:"maximum lifetime of tokens" split_words:"true"`
36-
LogLevel string `default:"DEBUG" desc:"Log level" split_words:"true"`
37-
NSPEntryTimeout time.Duration `default:"30s" desc:"Timeout of the entries" envconfig:"nsp_entry_timeout"`
38-
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
39-
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
26+
Name string `default:"nsc" desc:"Name of the target"`
27+
Node string `default:"" desc:"Node name the target is running on" split_words:"true"`
28+
Namespace string `default:"default" desc:"Namespace the trenches to connect to are running on" split_words:"true"`
29+
Socket string `default:"/ambassador.sock" desc:"Path of the socket file of the TAPA" split_words:"true"`
30+
NSMSocket url.URL `default:"unix:///var/lib/networkservicemesh/nsm.io.sock" desc:"Path of the socket file of NSM" envconfig:"nsm_socket"`
31+
NSPServiceName string `default:"nsp-service" desc:"Domain name of the NSP Service" envconfig:"nsp_service_name"`
32+
NSPServicePort int `default:"7778" desc:"port of the NSP Service" envconfig:"nsp_service_port"`
33+
Timeout time.Duration `default:"15s" desc:"timeout of NSM request/close, NSP register/unregister..." split_words:"true"`
34+
DialTimeout time.Duration `default:"5s" desc:"timeout to dial NSMgr" split_words:"true"`
35+
MaxTokenLifetime time.Duration `default:"24h" desc:"maximum lifetime of tokens" split_words:"true"`
36+
LogLevel string `default:"DEBUG" desc:"Log level" split_words:"true"`
37+
NSPEntryTimeout time.Duration `default:"30s" desc:"Timeout of the entries" envconfig:"nsp_entry_timeout"`
38+
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
39+
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
40+
LivenessCheckInterval time.Duration `default:"2s" desc:"Dataplane liveness check interval" split_words:"true"`
41+
LivenessCheckTimeout time.Duration `default:"1s" desc:"Dataplane liveness check timeout" split_words:"true"`
42+
LivenessCheckEnabled bool `default:"false" desc:"Dataplane liveness check enabled/disabled" split_words:"true"`
4043
}
4144

4245
// IsValid checks if the configuration is valid

cmd/tapa/main.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import (
4949
linuxKernel "github.com/nordix/meridio/pkg/kernel"
5050
"github.com/nordix/meridio/pkg/log"
5151
"github.com/nordix/meridio/pkg/nsm"
52+
kernelheal "github.com/nordix/meridio/pkg/nsm/heal"
5253
"github.com/nordix/meridio/pkg/nsm/interfacename"
5354
"github.com/sirupsen/logrus"
5455
"google.golang.org/grpc"
@@ -155,10 +156,19 @@ func main() {
155156
sendfd.NewClient(),
156157
}
157158

159+
healOptions := []heal.Option{}
160+
if config.LivenessCheckEnabled {
161+
healOptions = []heal.Option{
162+
heal.WithLivenessCheckInterval(config.LivenessCheckInterval),
163+
heal.WithLivenessCheckTimeout(config.LivenessCheckTimeout),
164+
heal.WithLivenessCheck(kernelheal.KernelLivenessCheck),
165+
}
166+
}
167+
158168
networkServiceClient := client.NewClient(ctx,
159169
client.WithClientURL(&nsmAPIClient.Config.ConnectTo),
160170
client.WithName(config.Name),
161-
client.WithHealClient(heal.NewClient(ctx)),
171+
client.WithHealClient(heal.NewClient(ctx, healOptions...)),
162172
client.WithAdditionalFunctionality(additionalFunctionality...),
163173
client.WithDialTimeout(nsmAPIClient.Config.DialTimeout),
164174
client.WithDialOptions(nsmAPIClient.GRPCDialOption...),

docs/components/tapa.md

+3
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ MERIDIO_LOG_LEVEL | string | Log level | DEBUG
5959
MERIDIO_NSP_ETRY_TIMEOUT | time.Duration | Timeout of the entries registered in NSP | 30s
6060
MERIDO_GRPC_MAX_BACKOFF | time.Duration | Upper bound on gRPC connection backoff delay | 5s
6161
MERIDIO_GRPC_PROBE_RPC_TIMEOUT | time.Duration | RPC timeout of internal gRPC health probes if any | 1s
62+
MERIDIO_LIVENESS_CHECK_INTERVAL | time.Duration | Dataplane liveness check interval | 2s
63+
MERIDIO_LIVENESS_CHECK_TIMEOUT | time.Duration | Dataplane liveness check timeout | 1s
64+
MERIDIO_LIVENESS_CHECK_ENABLED | bool | Dataplane liveness check enabled/disabled | false
6265

6366
## Command Line
6467

pkg/ambassador/tap/conduit/conduit.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,8 @@ func (c *Conduit) SetVIPs(ctx context.Context, vips []string) error {
384384
Labels: c.connection.GetLabels(),
385385
Payload: c.connection.GetPayload(),
386386
Context: &networkservice.ConnectionContext{
387-
IpContext: c.connection.GetContext().GetIpContext(),
387+
ExtraContext: c.connection.GetContext().GetExtraContext(),
388+
IpContext: c.connection.GetContext().GetIpContext(),
388389
},
389390
},
390391
}

pkg/proxy/proxy.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,10 @@ func (p *Proxy) SetIPContext(ctx context.Context, conn *networkservice.Connectio
265265
}
266266

267267
if interfaceType == networking.NSE {
268-
p.setNSEIpContext(id, conn.GetContext().GetIpContext(), srcIPAddrs, dstIpAddrs)
268+
if conn.GetContext().ExtraContext == nil {
269+
conn.GetContext().ExtraContext = map[string]string{}
270+
}
271+
p.setNSEIpContext(id, conn.GetContext().GetIpContext(), conn.GetContext().GetExtraContext(), srcIPAddrs, dstIpAddrs)
269272
} else if interfaceType == networking.NSC {
270273
ipContext := conn.GetContext().GetIpContext()
271274
oldSrcIpAddrs := ipContext.SrcIpAddrs
@@ -300,11 +303,15 @@ func (p *Proxy) SetIPContext(ctx context.Context, conn *networkservice.Connectio
300303
return nil
301304
}
302305

303-
func (p *Proxy) setNSEIpContext(id string, ipContext *networkservice.IPContext, srcIPAddrs []string, dstIpAddrs []string) {
306+
func (p *Proxy) setNSEIpContext(id string, ipContext *networkservice.IPContext, extraContext map[string]string, srcIPAddrs []string, dstIpAddrs []string) {
304307
if len(ipContext.SrcIpAddrs) == 0 && len(ipContext.DstIpAddrs) == 0 { // First request
305308
ipContext.SrcIpAddrs = srcIPAddrs
306309
ipContext.DstIpAddrs = dstIpAddrs
307310
ipContext.ExtraPrefixes = p.Bridge.GetLocalPrefixes()
311+
if p.Bridge != nil {
312+
extraContext[kernelheal.DatapathSourceIPsKey] = strings.Join(ipContext.SrcIpAddrs, kernelheal.DatapathIPsSeparator)
313+
extraContext[kernelheal.DatapathDestinationIPsKey] = strings.Join(p.Bridge.GetLocalPrefixes(), kernelheal.DatapathIPsSeparator)
314+
}
308315
p.logger.V(1).Info("Set IP Context of initial connection request",
309316
"id", id, "ipContext", ipContext, "interfaceType", "NSE")
310317
return

0 commit comments

Comments
 (0)