Skip to content

Commit 2411a2e

Browse files
committed
fix(networking): cleanup service/endpoint if needed
We meet a corner case that the service/endpoint would not be cleanup. That will cause the service keep the ClusterIP `None`. With this config, the endpoint of sharemanager would not correct. So the CSI driver cannot perform the mountpoint well. We would like to have a checking mechanism to know if the service/ endpoint did not cleanup. Then we will cleanup the service/endpoint to ensure the correct endpoint. Remove the cleanup function in the setting controller, we could do the cleanup on the sm controller Signed-off-by: Vicente Cheng <[email protected]>
1 parent 19ba220 commit 2411a2e

File tree

3 files changed

+76
-40
lines changed

3 files changed

+76
-40
lines changed

controller/setting_controller.go

-40
Original file line numberDiff line numberDiff line change
@@ -359,14 +359,6 @@ func (sc *SettingController) syncDangerZoneSettingsForManagedComponents(settingN
359359
return &types.ErrorInvalidState{Reason: fmt.Sprintf("failed to apply %v setting to Longhorn components when there are attached volumes. It will be eventually applied", types.SettingNameStorageNetworkForRWXVolumeEnabled)}
360360
}
361361

362-
// Perform cleanup of the share manager Service
363-
// This is to allow the creation of the correct Service
364-
// and Endpoint when switching between cluster network
365-
// and storage network.
366-
if err := sc.cleanupShareManagerServiceAndEndpoints(); err != nil {
367-
return err
368-
}
369-
370362
return nil
371363
}
372364

@@ -943,38 +935,6 @@ func (sc *SettingController) updateKubernetesClusterAutoscalerEnabled() error {
943935
return nil
944936
}
945937

946-
func (sc *SettingController) cleanupShareManagerServiceAndEndpoints() error {
947-
var err error
948-
defer func() {
949-
if err != nil {
950-
err = errors.Wrapf(err, "failed to cleanup share manager service and endpoints for %s setting update", types.SettingNameStorageNetworkForRWXVolumeEnabled)
951-
}
952-
}()
953-
954-
shareManagers, err := sc.ds.ListShareManagers()
955-
if err != nil {
956-
return err
957-
}
958-
959-
for _, shareManager := range shareManagers {
960-
log := sc.logger.WithField("shareManager", shareManager.Name)
961-
962-
log.WithField("service", shareManager.Name).Infof("Deleting Service for %v setting update", types.SettingNameStorageNetworkForRWXVolumeEnabled)
963-
err := sc.ds.DeleteService(shareManager.Namespace, shareManager.Name)
964-
if err != nil && !apierrors.IsNotFound(err) {
965-
return err
966-
}
967-
968-
log.WithField("endpoint", shareManager.Name).Infof("Deleting Endpoint for %v setting update", types.SettingNameStorageNetworkForRWXVolumeEnabled)
969-
err = sc.ds.DeleteKubernetesEndpoint(shareManager.Namespace, shareManager.Name)
970-
if err != nil && !apierrors.IsNotFound(err) {
971-
return err
972-
}
973-
}
974-
975-
return nil
976-
}
977-
978938
// updateCNI deletes all system-managed data plane components immediately with the updated CNI annotation.
979939
func (sc *SettingController) updateCNI(funcPreupdate func() error) error {
980940
storageNetwork, err := sc.ds.GetSettingWithAutoFillingRO(types.SettingNameStorageNetwork)

controller/share_manager_controller.go

+68
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,69 @@ func (c *ShareManagerController) getShareManagerTolerationsFromStorageClass(sc *
10501050
return tolerations
10511051
}
10521052

1053+
func (c *ShareManagerController) checkStorageNetworkApplied() (bool, error) {
1054+
targetSettings := []types.SettingName{types.SettingNameStorageNetwork, types.SettingNameStorageNetworkForRWXVolumeEnabled}
1055+
for _, item := range targetSettings {
1056+
if applied, err := c.ds.GetSettingApplied(item); err != nil || !applied {
1057+
return applied, err
1058+
}
1059+
}
1060+
return true, nil
1061+
}
1062+
1063+
func (c *ShareManagerController) cleanupServiceAndEndpoint(shareManager *longhorn.ShareManager) error {
1064+
service, err := c.ds.GetService(c.namespace, shareManager.Name)
1065+
if err != nil {
1066+
// if NotFound, means the service/endpoint is already cleaned up
1067+
if apierrors.IsNotFound(err) {
1068+
return nil
1069+
}
1070+
return errors.Wrapf(err, "failed to get service for share manager %v", shareManager.Name)
1071+
}
1072+
1073+
applied, err := c.checkStorageNetworkApplied()
1074+
if err != nil {
1075+
return errors.Wrapf(err, "failed to check if the storage network setting is applied")
1076+
}
1077+
if !applied {
1078+
c.logger.Warnf("`StorageNetwork` related settings are not applied, do nothing.")
1079+
return nil
1080+
}
1081+
1082+
storageNetwork, err := c.ds.GetSettingWithAutoFillingRO(types.SettingNameStorageNetwork)
1083+
if err != nil {
1084+
return errors.Wrapf(err, "failed to get setting value %v", types.SettingNameStorageNetwork)
1085+
}
1086+
1087+
storageNetworkForRWXVolumeEnabled, err := c.ds.GetSettingAsBool(types.SettingNameStorageNetworkForRWXVolumeEnabled)
1088+
if err != nil {
1089+
return errors.Wrapf(err, "failed to get setting value %v", types.SettingNameStorageNetworkForRWXVolumeEnabled)
1090+
}
1091+
1092+
// no need to cleanup because looks the service file is correct
1093+
if types.IsStorageNetworkForRWXVolume(storageNetwork, storageNetworkForRWXVolumeEnabled) && service.Spec.ClusterIP == core.ClusterIPNone {
1094+
return nil
1095+
}
1096+
if !types.IsStorageNetworkForRWXVolume(storageNetwork, storageNetworkForRWXVolumeEnabled) && service.Spec.ClusterIP != core.ClusterIPNone {
1097+
return nil
1098+
}
1099+
1100+
// let's cleanup
1101+
c.logger.Infof("Deleting Service for sharemanager %v", shareManager.Name)
1102+
err = c.ds.DeleteService(c.namespace, shareManager.Name)
1103+
if err != nil && !apierrors.IsNotFound(err) {
1104+
return errors.Wrapf(err, "failed to delete service for share manager %v", shareManager.Name)
1105+
}
1106+
1107+
c.logger.Infof("Deleting Endpoint for sharemanager %v", shareManager.Name)
1108+
err = c.ds.DeleteKubernetesEndpoint(c.namespace, shareManager.Name)
1109+
if err != nil && !apierrors.IsNotFound(err) {
1110+
return errors.Wrapf(err, "failed to delete Endpoint for share manager %v", shareManager.Name)
1111+
}
1112+
1113+
return nil
1114+
}
1115+
10531116
func (c *ShareManagerController) createServiceAndEndpoint(shareManager *longhorn.ShareManager) error {
10541117
// check if we need to create the service
10551118
_, err := c.ds.GetService(c.namespace, shareManager.Name)
@@ -1122,6 +1185,11 @@ func (c *ShareManagerController) createShareManagerPod(sm *longhorn.ShareManager
11221185
}
11231186
priorityClass := setting.Value
11241187

1188+
err = c.cleanupServiceAndEndpoint(sm)
1189+
if err != nil {
1190+
return nil, errors.Wrapf(err, "failed to cleanup service and endpoint for share manager %v", sm.Name)
1191+
}
1192+
11251193
err = c.createServiceAndEndpoint(sm)
11261194
if err != nil {
11271195
return nil, errors.Wrapf(err, "failed to create service and endpoint for share manager %v", sm.Name)

datastore/longhorn.go

+8
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,14 @@ func (s *DataStore) GetSettingExactRO(sName types.SettingName) (*longhorn.Settin
619619
return resultRO, nil
620620
}
621621

622+
func (s *DataStore) GetSettingApplied(sName types.SettingName) (bool, error) {
623+
resultRO, err := s.getSettingRO(string(sName))
624+
if err != nil {
625+
return false, err
626+
}
627+
return resultRO.Status.Applied, nil
628+
}
629+
622630
// GetSetting will automatically fill the non-existing setting if it's a valid
623631
// setting name.
624632
// The function will not return nil for *longhorn.Setting when error is nil

0 commit comments

Comments
 (0)