Skip to content

Commit 83852f0

Browse files
authored
Handle ALPN handler error for upgrade flow and add some reporting (#54164)
* Handle ALPN handler error for upgrade flow and add some reporting * fix test and use logutils.StringerAttr * make lint happy * rename metrics * expliclity pass connSource instead of using context * fix build...
1 parent 8873828 commit 83852f0

File tree

7 files changed

+273
-14
lines changed

7 files changed

+273
-14
lines changed

lib/service/service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5569,7 +5569,7 @@ func (process *TeleportProcess) initProxyEndpoint(conn *Connector) error {
55695569
if err != nil {
55705570
return trace.Wrap(err)
55715571
}
5572-
alpnHandlerForWeb.Set(alpnServer.MakeConnectionHandler(alpnTLSConfigForWeb))
5572+
alpnHandlerForWeb.Set(alpnServer.MakeConnectionHandler(alpnTLSConfigForWeb, alpncommon.ConnHandlerSourceWeb))
55735573

55745574
process.RegisterCriticalFunc("proxy.tls.alpn.sni.proxy", func() error {
55755575
logger.InfoContext(process.ExitContext(), "Starting TLS ALPN SNI proxy server on.", "listen_address", logutils.StringerAttr(listeners.alpn.Addr()))

lib/srv/alpnproxy/common/reporting.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Teleport
3+
* Copyright (C) 2025 Gravitational, Inc.
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU Affero General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU Affero General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Affero General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
package common
20+
21+
// ConnHandlerSource specifies the source of the connection that is passed into
22+
// the TLS routing connection handler.
23+
type ConnHandlerSource string
24+
25+
const (
26+
// ConnHandlerSourceListener indicates the connection is from the TLS
27+
// routing port listener.
28+
ConnHandlerSourceListener ConnHandlerSource = "listener"
29+
// ConnHandlerSourceWeb indicates the connection is from web handler like
30+
// connection upgrades or certain Web UI flows like DB session via Web UI.
31+
ConnHandlerSourceWeb ConnHandlerSource = "web"
32+
)

lib/srv/alpnproxy/conn.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ package alpnproxy
2121
import (
2222
"io"
2323
"net"
24+
"sync"
2425
"time"
2526

2627
"github.com/gravitational/teleport/lib/utils"
@@ -99,3 +100,32 @@ func (conn readOnlyConn) RemoteAddr() net.Addr { return &utils.Net
99100
func (conn readOnlyConn) SetDeadline(t time.Time) error { return nil }
100101
func (conn readOnlyConn) SetReadDeadline(t time.Time) error { return nil }
101102
func (conn readOnlyConn) SetWriteDeadline(t time.Time) error { return nil }
103+
104+
type reportingConn struct {
105+
net.Conn
106+
closeOnce func() error
107+
}
108+
109+
// newReportingConn returns a net.Conn that wraps the input conn, increments the
110+
// active connections gauge on creation, and decreases the active connections
111+
// gauge on close.
112+
func newReportingConn(conn net.Conn, clientALPN string, connSource string) net.Conn {
113+
proxyActiveConnections.WithLabelValues(clientALPN, connSource).Inc()
114+
return &reportingConn{
115+
Conn: conn,
116+
closeOnce: sync.OnceValue(func() error {
117+
proxyActiveConnections.WithLabelValues(clientALPN, connSource).Dec()
118+
// Do not wrap.
119+
return conn.Close()
120+
}),
121+
}
122+
}
123+
124+
// NetConn returns the underlying net.Conn.
125+
func (c *reportingConn) NetConn() net.Conn {
126+
return c.Conn
127+
}
128+
129+
func (c *reportingConn) Close() error {
130+
return c.closeOnce()
131+
}

lib/srv/alpnproxy/proxy.go

Lines changed: 105 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,27 @@ import (
2727
"io"
2828
"log/slog"
2929
"net"
30+
"slices"
3031
"strings"
3132
"sync"
3233
"time"
3334

3435
"github.com/gravitational/trace"
3536
"github.com/jonboulle/clockwork"
37+
"github.com/prometheus/client_golang/prometheus"
3638

3739
"github.com/gravitational/teleport"
3840
"github.com/gravitational/teleport/api/constants"
3941
"github.com/gravitational/teleport/api/utils/pingconn"
4042
"github.com/gravitational/teleport/lib/auth/authclient"
4143
"github.com/gravitational/teleport/lib/authz"
4244
"github.com/gravitational/teleport/lib/defaults"
45+
"github.com/gravitational/teleport/lib/observability/metrics"
4346
"github.com/gravitational/teleport/lib/srv/alpnproxy/common"
4447
"github.com/gravitational/teleport/lib/srv/db/dbutils"
4548
"github.com/gravitational/teleport/lib/tlsca"
4649
"github.com/gravitational/teleport/lib/utils"
50+
logutils "github.com/gravitational/teleport/lib/utils/log"
4751
)
4852

4953
// ProxyConfig is the configuration for an ALPN proxy server.
@@ -298,6 +302,14 @@ func New(cfg ProxyConfig) (*Proxy, error) {
298302
return nil, trace.Wrap(err)
299303
}
300304

305+
if err := metrics.RegisterPrometheusCollectors(
306+
proxyConnectionsTotal,
307+
proxyActiveConnections,
308+
proxyConnectionErrorsTotal,
309+
); err != nil {
310+
return nil, trace.Wrap(err)
311+
}
312+
301313
return &Proxy{
302314
cfg: cfg,
303315
log: cfg.Log,
@@ -332,7 +344,7 @@ func (p *Proxy) Serve(ctx context.Context) error {
332344
// For example in ReverseTunnel handles connection asynchronously and closing conn after
333345
// service handler returned will break service logic.
334346
// https://github.com/gravitational/teleport/blob/master/lib/sshutils/server.go#L397
335-
if err := p.handleConn(ctx, clientConn, nil); err != nil {
347+
if err := p.handleConn(ctx, clientConn, nil, common.ConnHandlerSourceListener); err != nil {
336348
if cerr := clientConn.Close(); cerr != nil && !utils.IsOKNetworkError(cerr) {
337349
p.log.WarnContext(ctx, "Failed to close client connection", "error", cerr)
338350
}
@@ -372,8 +384,24 @@ type HandlerFuncWithInfo func(ctx context.Context, conn net.Conn, info Connectio
372384
// 5. For backward compatibility check RouteToDatabase identity field
373385
// was set if yes forward to the generic TLS DB handler.
374386
// 6. Forward connection to the handler obtained in step 2.
375-
func (p *Proxy) handleConn(ctx context.Context, clientConn net.Conn, defaultOverride *tls.Config) error {
376-
hello, conn, err := p.readHelloMessageWithoutTLSTermination(ctx, clientConn)
387+
func (p *Proxy) handleConn(ctx context.Context, clientConn net.Conn, defaultOverride *tls.Config, connSource common.ConnHandlerSource) (err error) {
388+
var hello *tls.ClientHelloInfo
389+
var conn net.Conn
390+
defer func() {
391+
if err != nil {
392+
proxyConnectionErrorsTotal.WithLabelValues(
393+
getRequestedALPNFromHello(hello),
394+
string(connSource),
395+
).Inc()
396+
}
397+
}()
398+
399+
// Attempt to read TLS hello. Always increment total counters even on failures.
400+
hello, conn, err = p.readHelloMessageWithoutTLSTermination(ctx, clientConn, connSource)
401+
proxyConnectionsTotal.WithLabelValues(
402+
getRequestedALPNFromHello(hello),
403+
string(connSource),
404+
).Inc()
377405
if err != nil {
378406
return trace.Wrap(err)
379407
}
@@ -503,7 +531,7 @@ func (p *Proxy) getTLSConfig(desc *HandlerDecs, defaultOverride *tls.Config) *tl
503531
// readHelloMessageWithoutTLSTermination allows reading a ClientHelloInfo message without termination of
504532
// incoming TLS connection. After calling readHelloMessageWithoutTLSTermination function a returned
505533
// net.Conn should be used for further operation.
506-
func (p *Proxy) readHelloMessageWithoutTLSTermination(ctx context.Context, conn net.Conn) (*tls.ClientHelloInfo, net.Conn, error) {
534+
func (p *Proxy) readHelloMessageWithoutTLSTermination(ctx context.Context, conn net.Conn, connSource common.ConnHandlerSource) (*tls.ClientHelloInfo, net.Conn, error) {
507535
buff := new(bytes.Buffer)
508536
var hello *tls.ClientHelloInfo
509537
tlsConn := tls.Server(readOnlyConn{reader: io.TeeReader(conn, buff)}, &tls.Config{
@@ -526,7 +554,13 @@ func (p *Proxy) readHelloMessageWithoutTLSTermination(ctx context.Context, conn
526554
if err := conn.SetReadDeadline(time.Time{}); err != nil {
527555
return nil, nil, trace.Wrap(err)
528556
}
529-
return hello, newBufferedConn(conn, buff), nil
557+
return hello,
558+
newReportingConn(
559+
newBufferedConn(conn, buff),
560+
getRequestedALPNFromHello(hello),
561+
string(connSource),
562+
),
563+
nil
530564
}
531565

532566
func (p *Proxy) handleDatabaseConnection(ctx context.Context, conn net.Conn, connInfo ConnectionInfo) error {
@@ -630,9 +664,20 @@ func (p *Proxy) Close() error {
630664

631665
// MakeConnectionHandler creates a ConnectionHandler which provides a callback
632666
// to handle incoming connections by this ALPN proxy server.
633-
func (p *Proxy) MakeConnectionHandler(defaultOverride *tls.Config) ConnectionHandler {
667+
//
668+
// Note that some registered handlers are async. The input client connection
669+
// will be automatically closed upon handler errors.
670+
func (p *Proxy) MakeConnectionHandler(defaultOverride *tls.Config, connSource common.ConnHandlerSource) ConnectionHandler {
634671
return func(ctx context.Context, conn net.Conn) error {
635-
return p.handleConn(ctx, conn, defaultOverride)
672+
if err := p.handleConn(ctx, conn, defaultOverride, connSource); err != nil {
673+
// Make sure we close the connection on error.
674+
if cerr := conn.Close(); cerr != nil && !utils.IsOKNetworkError(cerr) {
675+
p.log.WarnContext(ctx, "Failed to close client connection", "error", cerr, "remote_addr", logutils.StringerAttr(conn.RemoteAddr()))
676+
}
677+
// Still return the error for caller to report/log.
678+
return trace.Wrap(err)
679+
}
680+
return nil
636681
}
637682
}
638683

@@ -664,3 +709,56 @@ func isConnRemoteError(err error) bool {
664709
var opErr *net.OpError
665710
return errors.As(err, &opErr) && opErr.Op == "remote error"
666711
}
712+
713+
// getRequestedALPNFromHello returns the primary requested ALPN by the client.
714+
// The server may not always terminate TLS so we won't know the negotiated
715+
// protocol. Here we just return the first supported ALPN from the client hello
716+
// and assumes that will likely be the one gets selected. If no supported ALPN
717+
// found, the function returns "unknown".
718+
func getRequestedALPNFromHello(hello *tls.ClientHelloInfo) string {
719+
if hello == nil {
720+
return "unknown"
721+
}
722+
723+
for i := range hello.SupportedProtos {
724+
protocol := common.Protocol(hello.SupportedProtos[i])
725+
if strings.HasPrefix(string(protocol), string(common.ProtocolAuth)) {
726+
protocol = common.ProtocolAuth
727+
}
728+
729+
if slices.Contains(common.SupportedProtocols, protocol) {
730+
return string(protocol)
731+
}
732+
}
733+
return "unknown"
734+
}
735+
736+
var (
737+
proxyConnectionsTotal = prometheus.NewGaugeVec(
738+
prometheus.GaugeOpts{
739+
Namespace: teleport.MetricNamespace,
740+
Subsystem: "alpn_proxy",
741+
Name: "connections_total",
742+
Help: "Number of total connections handled by TLS routing proxy server.",
743+
},
744+
[]string{"alpn", "source"},
745+
)
746+
proxyActiveConnections = prometheus.NewGaugeVec(
747+
prometheus.GaugeOpts{
748+
Namespace: teleport.MetricNamespace,
749+
Subsystem: "alpn_proxy",
750+
Name: "active_connections",
751+
Help: "Number of active connections handled by TLS routing proxy server.",
752+
},
753+
[]string{"alpn", "source"},
754+
)
755+
proxyConnectionErrorsTotal = prometheus.NewGaugeVec(
756+
prometheus.GaugeOpts{
757+
Namespace: teleport.MetricNamespace,
758+
Subsystem: "alpn_proxy",
759+
Name: "connection_errors_total",
760+
Help: "Number of connection handler errors encountered by TLS routing proxy server.",
761+
},
762+
[]string{"alpn", "source"},
763+
)
764+
)

0 commit comments

Comments
 (0)