Skip to content

Commit 5a728db

Browse files
Another batch of cleanups in otlp exporter (open-telemetry#1357)
* Move connection logic into grpcConnection object If we will need to maintain more than one connection in future, this splitting off will come in handy. Co-authored-by: Stefan Prisca <[email protected]> * Make another channel a signal channel There is another channel that serves as a one-time signal, where channel's data type does not matter. * Reorder and document connection members This is to make clear that the lock is guarding only the connection since it can be changed by multiple goroutines, and other members are either atomic or read-only. * Move stop signal into connection The stop channel was rather useless on the exporter side - the primary reason for existence of this channel is to stop a background reconnecting goroutine. Since the goroutine lives entirely within grpcConnection object, move the stop channel here. Also expose a function to unify the stop channel with the context cancellation, so exporter can use it without knowing anything about stop channels. Also make export functions a bit more consistent. * Do not run reconnection routine when being stopped too It's possible that both disconnected channel and stop channel will be triggered around the same time, so the goroutine is as likely to start reconnecting as to return from the goroutine. Make sure we return if the stop channel is closed. * Nil clients on connection error Set clients to nil on connection error, so we don't try to send the data over a bad connection, but return a "no client" error immediately. * Do not call new connection handler within critical section It's rather risky to call a callback coming from outside within a critical section. Move it out. * Add context parameter to connection routines Connecting to the collector may also take its time, so it can be useful in some cases to pass a context with a deadline. Currently we just pass a background context, so this commit does not really change any behavior. The follow-up commits will make a use of it, though. * Add context parameter to NewExporter and Start It makes it possible to limit the time spent on connecting to the collector. * Stop connecting on shutdown Dialling to grpc service ignored the closing of the stop channel, but this can be easily changed. * Close connection after background is shut down That way we can make sure that there won't be a window between closing a connection and waiting for the background goroutine to return, where the new connection could be established. * Remove unnecessary nil check This member is never nil, unless the Exporter is created like &Exporter{}, which is not a thing we support anyway. * Update changelog Co-authored-by: Stefan Prisca <[email protected]>
1 parent e081978 commit 5a728db

File tree

8 files changed

+320
-249
lines changed

8 files changed

+320
-249
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
1111
### Changed
1212

1313
- Move the OpenCensus example into `example` directory. (#1359)
14+
- `NewExporter` and `Start` functions in `go.opentelemetry.io/otel/exporters/otlp` now receive `context.Context` as a first parameter. (#1357)
1415

1516
## [0.14.0] - 2020-11-19
1617

example/otel-collector/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func initProvider() func() {
4949
// `localhost:30080` address. Otherwise, replace `localhost` with the
5050
// address of your cluster. If you run the app inside k8s, then you can
5151
// probably connect directly to the service through dns
52-
exp, err := otlp.NewExporter(
52+
exp, err := otlp.NewExporter(ctx,
5353
otlp.WithInsecure(),
5454
otlp.WithAddress("localhost:30080"),
5555
otlp.WithGRPCDialOption(grpc.WithBlock()), // useful for testing

exporters/otlp/alignment_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ import (
2626
func TestMain(m *testing.M) {
2727
fields := []ottest.FieldOffset{
2828
{
29-
Name: "Exporter.lastConnectErrPtr",
30-
Offset: unsafe.Offsetof(Exporter{}.lastConnectErrPtr),
29+
Name: "grpcConnection.lastConnectErrPtr",
30+
Offset: unsafe.Offsetof(grpcConnection{}.lastConnectErrPtr),
3131
},
3232
}
3333
if !ottest.Aligned8Byte(fields, os.Stderr) {

exporters/otlp/connection.go

+186-23
Original file line numberDiff line numberDiff line change
@@ -15,52 +15,113 @@
1515
package otlp // import "go.opentelemetry.io/otel/exporters/otlp"
1616

1717
import (
18+
"context"
19+
"fmt"
1820
"math/rand"
21+
"sync"
1922
"sync/atomic"
2023
"time"
2124
"unsafe"
25+
26+
"google.golang.org/grpc"
27+
"google.golang.org/grpc/metadata"
2228
)
2329

24-
func (e *Exporter) lastConnectError() error {
25-
errPtr := (*error)(atomic.LoadPointer(&e.lastConnectErrPtr))
30+
type grpcConnection struct {
31+
// Ensure pointer is 64-bit aligned for atomic operations on both 32 and 64 bit machines.
32+
lastConnectErrPtr unsafe.Pointer
33+
34+
// mu protects the connection as it is accessed by the
35+
// exporter goroutines and background connection goroutine
36+
mu sync.Mutex
37+
cc *grpc.ClientConn
38+
39+
// these fields are read-only after constructor is finished
40+
c config
41+
metadata metadata.MD
42+
newConnectionHandler func(cc *grpc.ClientConn) error
43+
44+
// these channels are created once
45+
disconnectedCh chan bool
46+
backgroundConnectionDoneCh chan struct{}
47+
stopCh chan struct{}
48+
49+
// this is for tests, so they can replace the closing
50+
// routine without a worry of modifying some global variable
51+
// or changing it back to original after the test is done
52+
closeBackgroundConnectionDoneCh func(ch chan struct{})
53+
}
54+
55+
func newGRPCConnection(c config, handler func(cc *grpc.ClientConn) error) *grpcConnection {
56+
conn := new(grpcConnection)
57+
conn.newConnectionHandler = handler
58+
if c.collectorAddr == "" {
59+
c.collectorAddr = fmt.Sprintf("%s:%d", DefaultCollectorHost, DefaultCollectorPort)
60+
}
61+
conn.c = c
62+
if len(conn.c.headers) > 0 {
63+
conn.metadata = metadata.New(conn.c.headers)
64+
}
65+
conn.closeBackgroundConnectionDoneCh = func(ch chan struct{}) {
66+
close(ch)
67+
}
68+
return conn
69+
}
70+
71+
func (oc *grpcConnection) startConnection(ctx context.Context) {
72+
oc.stopCh = make(chan struct{})
73+
oc.disconnectedCh = make(chan bool)
74+
oc.backgroundConnectionDoneCh = make(chan struct{})
75+
76+
if err := oc.connect(ctx); err == nil {
77+
oc.setStateConnected()
78+
} else {
79+
oc.setStateDisconnected(err)
80+
}
81+
go oc.indefiniteBackgroundConnection()
82+
}
83+
84+
func (oc *grpcConnection) lastConnectError() error {
85+
errPtr := (*error)(atomic.LoadPointer(&oc.lastConnectErrPtr))
2686
if errPtr == nil {
2787
return nil
2888
}
2989
return *errPtr
3090
}
3191

32-
func (e *Exporter) saveLastConnectError(err error) {
92+
func (oc *grpcConnection) saveLastConnectError(err error) {
3393
var errPtr *error
3494
if err != nil {
3595
errPtr = &err
3696
}
37-
atomic.StorePointer(&e.lastConnectErrPtr, unsafe.Pointer(errPtr))
97+
atomic.StorePointer(&oc.lastConnectErrPtr, unsafe.Pointer(errPtr))
3898
}
3999

40-
func (e *Exporter) setStateDisconnected(err error) {
41-
e.saveLastConnectError(err)
100+
func (oc *grpcConnection) setStateDisconnected(err error) {
101+
oc.saveLastConnectError(err)
42102
select {
43-
case e.disconnectedCh <- true:
103+
case oc.disconnectedCh <- true:
44104
default:
45105
}
106+
_ = oc.newConnectionHandler(nil)
46107
}
47108

48-
func (e *Exporter) setStateConnected() {
49-
e.saveLastConnectError(nil)
109+
func (oc *grpcConnection) setStateConnected() {
110+
oc.saveLastConnectError(nil)
50111
}
51112

52-
func (e *Exporter) connected() bool {
53-
return e.lastConnectError() == nil
113+
func (oc *grpcConnection) connected() bool {
114+
return oc.lastConnectError() == nil
54115
}
55116

56117
const defaultConnReattemptPeriod = 10 * time.Second
57118

58-
func (e *Exporter) indefiniteBackgroundConnection() {
119+
func (oc *grpcConnection) indefiniteBackgroundConnection() {
59120
defer func() {
60-
e.backgroundConnectionDoneCh <- true
121+
oc.closeBackgroundConnectionDoneCh(oc.backgroundConnectionDoneCh)
61122
}()
62123

63-
connReattemptPeriod := e.c.reconnectionPeriod
124+
connReattemptPeriod := oc.c.reconnectionPeriod
64125
if connReattemptPeriod <= 0 {
65126
connReattemptPeriod = defaultConnReattemptPeriod
66127
}
@@ -79,35 +140,137 @@ func (e *Exporter) indefiniteBackgroundConnection() {
79140
// 2. Otherwise block until we are disconnected, and
80141
// then retry connecting
81142
select {
82-
case <-e.stopCh:
143+
case <-oc.stopCh:
83144
return
84145

85-
case <-e.disconnectedCh:
146+
case <-oc.disconnectedCh:
147+
// Quickly check if we haven't stopped at the
148+
// same time.
149+
select {
150+
case <-oc.stopCh:
151+
return
152+
153+
default:
154+
}
155+
86156
// Normal scenario that we'll wait for
87157
}
88158

89-
if err := e.connect(); err == nil {
90-
e.setStateConnected()
159+
if err := oc.connect(context.Background()); err == nil {
160+
oc.setStateConnected()
91161
} else {
92-
e.setStateDisconnected(err)
162+
oc.setStateDisconnected(err)
93163
}
94164

95165
// Apply some jitter to avoid lockstep retrials of other
96166
// collector-exporters. Lockstep retrials could result in an
97167
// innocent DDOS, by clogging the machine's resources and network.
98168
jitter := time.Duration(rng.Int63n(maxJitterNanos))
99169
select {
100-
case <-e.stopCh:
170+
case <-oc.stopCh:
101171
return
102172
case <-time.After(connReattemptPeriod + jitter):
103173
}
104174
}
105175
}
106176

107-
func (e *Exporter) connect() error {
108-
cc, err := e.dialToCollector()
177+
func (oc *grpcConnection) connect(ctx context.Context) error {
178+
cc, err := oc.dialToCollector(ctx)
109179
if err != nil {
110180
return err
111181
}
112-
return e.enableConnections(cc)
182+
oc.setConnection(cc)
183+
return oc.newConnectionHandler(cc)
184+
}
185+
186+
// setConnection sets cc as the client connection and returns true if
187+
// the connection state changed.
188+
func (oc *grpcConnection) setConnection(cc *grpc.ClientConn) bool {
189+
oc.mu.Lock()
190+
defer oc.mu.Unlock()
191+
192+
// If previous clientConn is same as the current then just return.
193+
// This doesn't happen right now as this func is only called with new ClientConn.
194+
// It is more about future-proofing.
195+
if oc.cc == cc {
196+
return false
197+
}
198+
199+
// If the previous clientConn was non-nil, close it
200+
if oc.cc != nil {
201+
_ = oc.cc.Close()
202+
}
203+
oc.cc = cc
204+
return true
205+
}
206+
207+
func (oc *grpcConnection) dialToCollector(ctx context.Context) (*grpc.ClientConn, error) {
208+
addr := oc.c.collectorAddr
209+
210+
dialOpts := []grpc.DialOption{}
211+
if oc.c.grpcServiceConfig != "" {
212+
dialOpts = append(dialOpts, grpc.WithDefaultServiceConfig(oc.c.grpcServiceConfig))
213+
}
214+
if oc.c.clientCredentials != nil {
215+
dialOpts = append(dialOpts, grpc.WithTransportCredentials(oc.c.clientCredentials))
216+
} else if oc.c.canDialInsecure {
217+
dialOpts = append(dialOpts, grpc.WithInsecure())
218+
}
219+
if oc.c.compressor != "" {
220+
dialOpts = append(dialOpts, grpc.WithDefaultCallOptions(grpc.UseCompressor(oc.c.compressor)))
221+
}
222+
if len(oc.c.grpcDialOptions) != 0 {
223+
dialOpts = append(dialOpts, oc.c.grpcDialOptions...)
224+
}
225+
226+
ctx, cancel := oc.contextWithStop(ctx)
227+
defer cancel()
228+
ctx = oc.contextWithMetadata(ctx)
229+
return grpc.DialContext(ctx, addr, dialOpts...)
230+
}
231+
232+
func (oc *grpcConnection) contextWithMetadata(ctx context.Context) context.Context {
233+
if oc.metadata.Len() > 0 {
234+
return metadata.NewOutgoingContext(ctx, oc.metadata)
235+
}
236+
return ctx
237+
}
238+
239+
func (oc *grpcConnection) shutdown(ctx context.Context) error {
240+
close(oc.stopCh)
241+
// Ensure that the backgroundConnector returns
242+
select {
243+
case <-oc.backgroundConnectionDoneCh:
244+
case <-ctx.Done():
245+
return ctx.Err()
246+
}
247+
248+
close(oc.disconnectedCh)
249+
250+
oc.mu.Lock()
251+
cc := oc.cc
252+
oc.cc = nil
253+
oc.mu.Unlock()
254+
255+
if cc != nil {
256+
return cc.Close()
257+
}
258+
259+
return nil
260+
}
261+
262+
func (oc *grpcConnection) contextWithStop(ctx context.Context) (context.Context, context.CancelFunc) {
263+
// Unify the parent context Done signal with the connection's
264+
// stop channel.
265+
ctx, cancel := context.WithCancel(ctx)
266+
go func(ctx context.Context, cancel context.CancelFunc) {
267+
select {
268+
case <-ctx.Done():
269+
// Nothing to do, either cancelled or deadline
270+
// happened.
271+
case <-oc.stopCh:
272+
cancel()
273+
}
274+
}(ctx, cancel)
275+
return ctx, cancel
113276
}

exporters/otlp/example_test.go

+8-6
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ import (
2828
)
2929

3030
func Example_insecure() {
31-
exp, err := otlp.NewExporter(otlp.WithInsecure())
31+
ctx := context.Background()
32+
exp, err := otlp.NewExporter(ctx, otlp.WithInsecure())
3233
if err != nil {
3334
log.Fatalf("Failed to create the collector exporter: %v", err)
3435
}
3536
defer func() {
36-
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
37+
ctx, cancel := context.WithTimeout(ctx, time.Second)
3738
defer cancel()
3839
if err := exp.Shutdown(ctx); err != nil {
3940
otel.Handle(err)
@@ -54,7 +55,7 @@ func Example_insecure() {
5455
tracer := otel.Tracer("test-tracer")
5556

5657
// Then use the OpenTelemetry tracing library, like we normally would.
57-
ctx, span := tracer.Start(context.Background(), "CollectorExporter-Example")
58+
ctx, span := tracer.Start(ctx, "CollectorExporter-Example")
5859
defer span.End()
5960

6061
for i := 0; i < 10; i++ {
@@ -72,12 +73,13 @@ func Example_withTLS() {
7273
log.Fatalf("failed to create gRPC client TLS credentials: %v", err)
7374
}
7475

75-
exp, err := otlp.NewExporter(otlp.WithTLSCredentials(creds))
76+
ctx := context.Background()
77+
exp, err := otlp.NewExporter(ctx, otlp.WithTLSCredentials(creds))
7678
if err != nil {
7779
log.Fatalf("failed to create the collector exporter: %v", err)
7880
}
7981
defer func() {
80-
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
82+
ctx, cancel := context.WithTimeout(ctx, time.Second)
8183
defer cancel()
8284
if err := exp.Shutdown(ctx); err != nil {
8385
otel.Handle(err)
@@ -98,7 +100,7 @@ func Example_withTLS() {
98100
tracer := otel.Tracer("test-tracer")
99101

100102
// Then use the OpenTelemetry tracing library, like we normally would.
101-
ctx, span := tracer.Start(context.Background(), "Securely-Talking-To-Collector-Span")
103+
ctx, span := tracer.Start(ctx, "Securely-Talking-To-Collector-Span")
102104
defer span.End()
103105

104106
for i := 0; i < 10; i++ {

0 commit comments

Comments
 (0)