Skip to content

Commit ef3b81a

Browse files
pierresouchaymkeeler
authored andcommitted
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413 This change allow to rename any well behaving recent agent with an ID to be renamed safely, ie: without taking the name of another one with case insensitive comparison. Deprecated behaviour warning ---------------------------- Due to asceding compatibility, it is still possible however to "take" the name of another name by not providing any ID. Note that when not providing any ID, it is possible to have 2 nodes having similar names with case differences, ie: myNode and mynode which might lead to DB corruption on Consul server side and lead to server not properly restarting. See #3983 and #4399 for Context about this change. Disabling registration of nodes without IDs as specified in #4414 should probably be the way to go eventually. * Removed the case-insensitive search when adding a node within the else block since it breaks the test TestAgentAntiEntropy_Services While the else case is probably legit, it will be fixed with #4414 in a later release. * Added again the test in the else to avoid duplicated names, but enforce this test only for nodes having IDs. Thus most tests without any ID will work, and allows us fixing * Added more tests regarding request with/without IDs. `TestStateStore_EnsureNode` now test registration and renaming with IDs `TestStateStore_EnsureNodeDeprecated` tests registration without IDs and tests removing an ID from a node as well as updated a node without its ID (deprecated behaviour kept for backwards compatibility) * Do not allow renaming in case of conflict, including when other node has no ID * Fixed function GetNodeID that was not working due to wrong type when searching node from its ID Thus, all tests about renaming were not working properly. Added the full test cas that allowed me to detect it. * Better error messages, more tests when nodeID is not a valid UUID in GetNodeID() * Added separate TestStateStore_GetNodeID to test GetNodeID. More complete test coverage for GetNodeID * Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn` Also fixed comments to be clearer after remarks from @banks * Fixed error message in unit test to match test case * Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
1 parent 9ce1076 commit ef3b81a

File tree

2 files changed

+508
-19
lines changed

2 files changed

+508
-19
lines changed

agent/consul/state/catalog.go

+65-12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/hashicorp/consul/api"
99
"github.com/hashicorp/consul/types"
1010
"github.com/hashicorp/go-memdb"
11+
uuid "github.com/hashicorp/go-uuid"
1112
)
1213

1314
const (
@@ -350,6 +351,25 @@ func (s *Store) EnsureNode(idx uint64, node *structs.Node) error {
350351
return nil
351352
}
352353

354+
// ensureNoNodeWithSimilarNameTxn checks that no other node has conflict in its name
355+
// If allowClashWithoutID then, getting a conflict on another node without ID will be allowed
356+
func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node, allowClashWithoutID bool) error {
357+
// Retrieve all of the nodes
358+
enodes, err := tx.Get("nodes", "id")
359+
if err != nil {
360+
return fmt.Errorf("Cannot lookup all nodes: %s", err)
361+
}
362+
for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() {
363+
enode := nodeIt.(*structs.Node)
364+
if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID {
365+
if !(enode.ID == "" && allowClashWithoutID) {
366+
return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node)
367+
}
368+
}
369+
}
370+
return nil
371+
}
372+
353373
// ensureNodeTxn is the inner function called to actually create a node
354374
// registration or modify an existing one in the state store. It allows
355375
// passing in a memdb transaction so it may be part of a larger txn.
@@ -358,28 +378,50 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err
358378
// name is the same.
359379
var n *structs.Node
360380
if node.ID != "" {
361-
existing, err := tx.First("nodes", "uuid", string(node.ID))
381+
existing, err := getNodeIDTxn(tx, node.ID)
362382
if err != nil {
363383
return fmt.Errorf("node lookup failed: %s", err)
364384
}
365385
if existing != nil {
366-
n = existing.(*structs.Node)
386+
n = existing
367387
if n.Node != node.Node {
368-
return fmt.Errorf("node ID %q for node %q aliases existing node %q",
369-
node.ID, node.Node, n.Node)
388+
// Lets first get all nodes and check whether name do match, we do not allow clash on nodes without ID
389+
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, false)
390+
if dupNameError != nil {
391+
return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError)
392+
}
393+
// We are actually renaming a node, remove its reference first
394+
err := s.deleteNodeTxn(tx, idx, n.Node)
395+
if err != nil {
396+
return fmt.Errorf("Error while renaming Node ID: %q from %s to %s",
397+
node.ID, n.Node, node.Node)
398+
}
399+
}
400+
} else {
401+
// We allow to "steal" another node name that would have no ID
402+
// It basically means that we allow upgrading a node without ID and add the ID
403+
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, true)
404+
if dupNameError != nil {
405+
return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError)
370406
}
371407
}
372408
}
409+
// TODO: else Node.ID == "" should be forbidden in future Consul releases
410+
// See https://github.com/hashicorp/consul/pull/3983 for context
373411

374412
// Check for an existing node by name to support nodes with no IDs.
375413
if n == nil {
376414
existing, err := tx.First("nodes", "id", node.Node)
377415
if err != nil {
378416
return fmt.Errorf("node name lookup failed: %s", err)
379417
}
418+
380419
if existing != nil {
381420
n = existing.(*structs.Node)
382421
}
422+
// WARNING, for compatibility reasons with tests, we do not check
423+
// for case insensitive matches, which may lead to DB corruption
424+
// See https://github.com/hashicorp/consul/pull/3983 for context
383425
}
384426

385427
// Get the indexes.
@@ -421,6 +463,23 @@ func (s *Store) GetNode(id string) (uint64, *structs.Node, error) {
421463
return idx, nil, nil
422464
}
423465

466+
func getNodeIDTxn(tx *memdb.Txn, id types.NodeID) (*structs.Node, error) {
467+
strnode := string(id)
468+
uuidValue, err := uuid.ParseUUID(strnode)
469+
if err != nil {
470+
return nil, fmt.Errorf("node lookup by ID failed, wrong UUID: %v for '%s'", err, strnode)
471+
}
472+
473+
node, err := tx.First("nodes", "uuid", uuidValue)
474+
if err != nil {
475+
return nil, fmt.Errorf("node lookup by ID failed: %s", err)
476+
}
477+
if node != nil {
478+
return node.(*structs.Node), nil
479+
}
480+
return nil, nil
481+
}
482+
424483
// GetNodeID is used to retrieve a node registration by node ID.
425484
func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) {
426485
tx := s.db.Txn(false)
@@ -430,14 +489,8 @@ func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) {
430489
idx := maxIndexTxn(tx, "nodes")
431490

432491
// Retrieve the node from the state store
433-
node, err := tx.First("nodes", "uuid", string(id))
434-
if err != nil {
435-
return 0, nil, fmt.Errorf("node lookup failed: %s", err)
436-
}
437-
if node != nil {
438-
return idx, node.(*structs.Node), nil
439-
}
440-
return idx, nil, nil
492+
node, err := getNodeIDTxn(tx, id)
493+
return idx, node, err
441494
}
442495

443496
// Nodes is used to return all of the known nodes.

0 commit comments

Comments
 (0)