@@ -20,11 +20,11 @@ import (
20
20
21
21
"google.golang.org/grpc"
22
22
23
- "github.com/armon/go-metrics"
23
+ metrics "github.com/armon/go-metrics"
24
24
"github.com/hashicorp/consul/acl"
25
25
"github.com/hashicorp/consul/agent/ae"
26
26
"github.com/hashicorp/consul/agent/cache"
27
- "github.com/hashicorp/consul/agent/cache-types"
27
+ cachetype "github.com/hashicorp/consul/agent/cache-types"
28
28
"github.com/hashicorp/consul/agent/checks"
29
29
"github.com/hashicorp/consul/agent/config"
30
30
"github.com/hashicorp/consul/agent/consul"
@@ -42,8 +42,8 @@ import (
42
42
"github.com/hashicorp/consul/logger"
43
43
"github.com/hashicorp/consul/types"
44
44
"github.com/hashicorp/consul/watch"
45
- "github.com/hashicorp/go-multierror"
46
- "github.com/hashicorp/go-uuid"
45
+ multierror "github.com/hashicorp/go-multierror"
46
+ uuid "github.com/hashicorp/go-uuid"
47
47
"github.com/hashicorp/memberlist"
48
48
"github.com/hashicorp/raft"
49
49
"github.com/hashicorp/serf/serf"
@@ -1911,15 +1911,7 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che
1911
1911
snap := a .snapshotCheckState ()
1912
1912
defer a .restoreCheckState (snap )
1913
1913
1914
- // Add the service
1915
- a .State .AddService (service , token )
1916
-
1917
- // Persist the service to a file
1918
- if persist && a .config .DataDir != "" {
1919
- if err := a .persistService (service ); err != nil {
1920
- return err
1921
- }
1922
- }
1914
+ var checks []* structs.HealthCheck
1923
1915
1924
1916
// Create an associated health check
1925
1917
for i , chkType := range chkTypes {
@@ -1947,14 +1939,81 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che
1947
1939
if chkType .Status != "" {
1948
1940
check .Status = chkType .Status
1949
1941
}
1950
- if err := a .AddCheck (check , chkType , persist , token , source ); err != nil {
1942
+
1943
+ checks = append (checks , check )
1944
+ }
1945
+
1946
+ // cleanup, store the ids of services and checks that weren't previously
1947
+ // registered so we clean them up if somthing fails halfway through the
1948
+ // process.
1949
+ var cleanupServices []string
1950
+ var cleanupChecks []types.CheckID
1951
+
1952
+ if s := a .State .Service (service .ID ); s == nil {
1953
+ cleanupServices = append (cleanupServices , service .ID )
1954
+ }
1955
+
1956
+ for _ , check := range checks {
1957
+ if c := a .State .Check (check .CheckID ); c == nil {
1958
+ cleanupChecks = append (cleanupChecks , check .CheckID )
1959
+ }
1960
+ }
1961
+
1962
+ err := a .State .AddServiceWithChecks (service , checks , token )
1963
+ if err != nil {
1964
+ a .cleanupRegistration (cleanupServices , cleanupChecks )
1965
+ return err
1966
+ }
1967
+
1968
+ for i := range checks {
1969
+ if err := a .addCheck (checks [i ], chkTypes [i ], service , persist , token , source ); err != nil {
1970
+ a .cleanupRegistration (cleanupServices , cleanupChecks )
1971
+ return err
1972
+ }
1973
+
1974
+ if persist && a .config .DataDir != "" {
1975
+ if err := a .persistCheck (checks [i ], chkTypes [i ]); err != nil {
1976
+ a .cleanupRegistration (cleanupServices , cleanupChecks )
1977
+ return err
1978
+
1979
+ }
1980
+ }
1981
+ }
1982
+
1983
+ // Persist the service to a file
1984
+ if persist && a .config .DataDir != "" {
1985
+ if err := a .persistService (service ); err != nil {
1986
+ a .cleanupRegistration (cleanupServices , cleanupChecks )
1951
1987
return err
1952
1988
}
1953
1989
}
1954
1990
1955
1991
return nil
1956
1992
}
1957
1993
1994
+ // cleanupRegistration is called on registration error to ensure no there are no
1995
+ // leftovers after a partial failure
1996
+ func (a * Agent ) cleanupRegistration (serviceIDs []string , checksIDs []types.CheckID ) {
1997
+ for _ , s := range serviceIDs {
1998
+ if err := a .State .RemoveService (s ); err != nil {
1999
+ a .logger .Printf ("[ERR] consul: service registration: cleanup: failed to remove service %s: %s" , s , err )
2000
+ }
2001
+ if err := a .purgeService (s ); err != nil {
2002
+ a .logger .Printf ("[ERR] consul: service registration: cleanup: failed to purge service %s file: %s" , s , err )
2003
+ }
2004
+ }
2005
+
2006
+ for _ , c := range checksIDs {
2007
+ a .cancelCheckMonitors (c )
2008
+ if err := a .State .RemoveCheck (c ); err != nil {
2009
+ a .logger .Printf ("[ERR] consul: service registration: cleanup: failed to remove check %s: %s" , c , err )
2010
+ }
2011
+ if err := a .purgeCheck (c ); err != nil {
2012
+ a .logger .Printf ("[ERR] consul: service registration: cleanup: failed to purge check %s file: %s" , c , err )
2013
+ }
2014
+ }
2015
+ }
2016
+
1958
2017
// RemoveService is used to remove a service entry.
1959
2018
// The agent will make a best effort to ensure it is deregistered
1960
2019
func (a * Agent ) RemoveService (serviceID string , persist bool ) error {
@@ -2018,6 +2077,44 @@ func (a *Agent) RemoveService(serviceID string, persist bool) error {
2018
2077
// ensure it is registered. The Check may include a CheckType which
2019
2078
// is used to automatically update the check status
2020
2079
func (a * Agent ) AddCheck (check * structs.HealthCheck , chkType * structs.CheckType , persist bool , token string , source configSource ) error {
2080
+ var service * structs.NodeService
2081
+
2082
+ if check .ServiceID != "" {
2083
+ service = a .State .Service (check .ServiceID )
2084
+ if service == nil {
2085
+ return fmt .Errorf ("ServiceID %q does not exist" , check .ServiceID )
2086
+ }
2087
+ }
2088
+
2089
+ // snapshot the current state of the health check to avoid potential flapping
2090
+ existing := a .State .Check (check .CheckID )
2091
+ defer func () {
2092
+ if existing != nil {
2093
+ a .State .UpdateCheck (check .CheckID , existing .Status , existing .Output )
2094
+ }
2095
+ }()
2096
+
2097
+ err := a .addCheck (check , chkType , service , persist , token , source )
2098
+ if err != nil {
2099
+ a .State .RemoveCheck (check .CheckID )
2100
+ return err
2101
+ }
2102
+
2103
+ // Add to the local state for anti-entropy
2104
+ err = a .State .AddCheck (check , token )
2105
+ if err != nil {
2106
+ return err
2107
+ }
2108
+
2109
+ // Persist the check
2110
+ if persist && a .config .DataDir != "" {
2111
+ return a .persistCheck (check , chkType )
2112
+ }
2113
+
2114
+ return nil
2115
+ }
2116
+
2117
+ func (a * Agent ) addCheck (check * structs.HealthCheck , chkType * structs.CheckType , service * structs.NodeService , persist bool , token string , source configSource ) error {
2021
2118
if check .CheckID == "" {
2022
2119
return fmt .Errorf ("CheckID missing" )
2023
2120
}
@@ -2039,12 +2136,8 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
2039
2136
}
2040
2137
2041
2138
if check .ServiceID != "" {
2042
- s := a .State .Service (check .ServiceID )
2043
- if s == nil {
2044
- return fmt .Errorf ("ServiceID %q does not exist" , check .ServiceID )
2045
- }
2046
- check .ServiceName = s .Service
2047
- check .ServiceTags = s .Tags
2139
+ check .ServiceName = service .Service
2140
+ check .ServiceTags = service .Tags
2048
2141
}
2049
2142
2050
2143
a .checkLock .Lock ()
@@ -2265,18 +2358,6 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
2265
2358
}
2266
2359
}
2267
2360
2268
- // Add to the local state for anti-entropy
2269
- err := a .State .AddCheck (check , token )
2270
- if err != nil {
2271
- a .cancelCheckMonitors (check .CheckID )
2272
- return err
2273
- }
2274
-
2275
- // Persist the check
2276
- if persist && a .config .DataDir != "" {
2277
- return a .persistCheck (check , chkType )
2278
- }
2279
-
2280
2361
return nil
2281
2362
}
2282
2363
0 commit comments