Skip to content

Commit 391903b

Browse files
committed
[PLAT-11187] Make TlsToggle task retryable
Summary: Record the actual intent (enable or disable) in the task param so that it remains unchanged. Other changes are minor tweaks. Test Plan: 1. UT added. 2. Due to PLAT-9434, rolling is not allowed. 3. Manually tested by aborting multiple times. Note: We will add itests for this. Reviewers: anijhawan, yshchetinin, anabaria, muthu, sanketh Reviewed By: anabaria Subscribers: yugaware Differential Revision: https://phorge.dev.yugabyte.com/D42936
1 parent f84e82d commit 391903b

File tree

7 files changed

+191
-117
lines changed

7 files changed

+191
-117
lines changed

managed/src/main/java/com/yugabyte/yw/commissioner/tasks/UniverseTaskBase.java

+1
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ public CustomBuilder taskTypes(Collection<? extends TaskType> taskTypes) {
365365
TaskType.VMImageUpgrade,
366366
TaskType.ThirdpartySoftwareUpgrade,
367367
TaskType.CertsRotate,
368+
TaskType.TlsToggle,
368369
TaskType.MasterFailover,
369370
TaskType.SyncMasterAddresses,
370371
TaskType.PauseUniverse,

managed/src/main/java/com/yugabyte/yw/commissioner/tasks/upgrade/TlsToggle.java

+30-21
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import com.google.common.collect.ImmutableMap;
66
import com.yugabyte.yw.commissioner.BaseTaskDependencies;
7+
import com.yugabyte.yw.commissioner.ITask.Abortable;
8+
import com.yugabyte.yw.commissioner.ITask.Retryable;
79
import com.yugabyte.yw.commissioner.TaskExecutor.SubTaskGroup;
810
import com.yugabyte.yw.commissioner.UpgradeTaskBase;
911
import com.yugabyte.yw.commissioner.UserTaskDetails.SubTaskGroupType;
@@ -14,6 +16,7 @@
1416
import com.yugabyte.yw.common.certmgmt.EncryptionInTransitUtil;
1517
import com.yugabyte.yw.common.config.UniverseConfKeys;
1618
import com.yugabyte.yw.forms.TlsToggleParams;
19+
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.UserIntent;
1720
import com.yugabyte.yw.forms.UpgradeTaskParams.UpgradeOption;
1821
import com.yugabyte.yw.forms.UpgradeTaskParams.UpgradeTaskSubType;
1922
import com.yugabyte.yw.forms.UpgradeTaskParams.UpgradeTaskType;
@@ -26,7 +29,11 @@
2629
import java.util.Map;
2730
import java.util.Set;
2831
import javax.inject.Inject;
32+
import lombok.extern.slf4j.Slf4j;
2933

34+
@Slf4j
35+
@Abortable
36+
@Retryable
3037
public class TlsToggle extends UpgradeTaskBase {
3138

3239
@Inject
@@ -71,7 +78,8 @@ protected void createPrecheckTasks(Universe universe) {
7178
if (!CertificateHelper.checkNode2NodeCertsExpiry(universe)) {
7279
addBasicPrecheckTasks();
7380
}
74-
if (taskParams().enableNodeToNodeEncrypt || taskParams().enableClientToNodeEncrypt) {
81+
if (isFirstTry()
82+
&& (taskParams().enableNodeToNodeEncrypt || taskParams().enableClientToNodeEncrypt)) {
7583
createCheckCertificateConfigTask();
7684
}
7785
}
@@ -96,16 +104,29 @@ public void run() {
96104
createUniverseSetTlsParamsTask();
97105
// Round 2 gflags upgrade
98106
createRound2GFlagUpdateTasks(nodes);
107+
},
108+
u -> {
109+
// 1: If task is to enable node-to-node encryption.
110+
// -1: If task is to disable node-to-node encryption.
111+
// 0: If there is no change in node-to-node encryption.
112+
UserIntent userIntent = u.getUniverseDetails().getPrimaryCluster().userIntent;
113+
taskParams().nodeToNodeChange =
114+
userIntent.enableNodeToNodeEncrypt != taskParams().enableNodeToNodeEncrypt
115+
? (taskParams().enableNodeToNodeEncrypt ? 1 : -1)
116+
: 0;
117+
// Persist this setting in the DB.
118+
updateTaskDetailsInDB(taskParams());
99119
});
100120
}
101121

102122
private void createRound1GFlagUpdateTasks(MastersAndTservers nodes) {
103-
if (getNodeToNodeChange() < 0) {
104-
// Skip running round1 if Node2Node certs have expired
123+
if (taskParams().nodeToNodeChange < 0) {
124+
// Skip running round1 if Node2Node certs have expired because the DB call will fail.
105125
if (CertificateHelper.checkNode2NodeCertsExpiry(getUniverse())) {
126+
log.debug("Skipping round 1 because the cert has expired");
106127
return;
107128
}
108-
// Setting allow_insecure to true can be done in non-restart way
129+
// Setting allow_insecure to true can be done in non-restart way.
109130
createNonRestartUpgradeTaskFlow(
110131
(List<NodeDetails> nodeList, Set<ServerType> processTypes) -> {
111132
createGFlagUpdateTasks(1, nodeList, getSingle(processTypes));
@@ -143,7 +164,7 @@ private void createRound1GFlagUpdateTasks(MastersAndTservers nodes) {
143164

144165
private void createRound2GFlagUpdateTasks(MastersAndTservers nodes) {
145166
// Second round upgrade not needed when there is no change in node-to-node
146-
if (getNodeToNodeChange() > 0) {
167+
if (taskParams().nodeToNodeChange > 0) {
147168
// Setting allow_insecure can be done in non-restart way
148169
createNonRestartUpgradeTaskFlow(
149170
(List<NodeDetails> nodeList, Set<ServerType> processTypes) -> {
@@ -162,7 +183,7 @@ private void createRound2GFlagUpdateTasks(MastersAndTservers nodes) {
162183
},
163184
nodes,
164185
DEFAULT_CONTEXT);
165-
} else if (getNodeToNodeChange() < 0) {
186+
} else if (taskParams().nodeToNodeChange < 0) {
166187
if (taskParams().upgradeOption == UpgradeOption.ROLLING_UPGRADE) {
167188
createRollingUpgradeTaskFlow(
168189
(nodeList, processTypes) ->
@@ -193,7 +214,7 @@ private void createRound2GFlagUpdateTasks(MastersAndTservers nodes) {
193214
}
194215

195216
protected void updateUniverseHttpsEnabledUI() {
196-
int nodeToNodeChange = getNodeToNodeChange();
217+
int nodeToNodeChange = taskParams().nodeToNodeChange;
197218
boolean isNodeUIHttpsEnabled =
198219
confGetter.getConfForScope(getUniverse(), UniverseConfKeys.nodeUIHttpsEnabled);
199220
// HTTPS_ENABLED_UI will piggyback node-to-node encryption.
@@ -290,7 +311,7 @@ private AnsibleConfigureServers getAnsibleConfigureServerTaskForToggleTls(
290311
params.rootCA = taskParams().rootCA;
291312
params.setClientRootCA(taskParams().getClientRootCA());
292313
params.rootAndClientRootCASame = taskParams().rootAndClientRootCASame;
293-
params.nodeToNodeChange = getNodeToNodeChange();
314+
params.nodeToNodeChange = taskParams().nodeToNodeChange;
294315
AnsibleConfigureServers task = createTask(AnsibleConfigureServers.class);
295316
task.initialize(params);
296317
task.setUserTaskUUID(getUserTaskUUID());
@@ -310,25 +331,13 @@ private AnsibleConfigureServers getAnsibleConfigureServerTaskForYbcToggleTls(Nod
310331
params.rootCA = taskParams().rootCA;
311332
params.setClientRootCA(taskParams().getClientRootCA());
312333
params.rootAndClientRootCASame = taskParams().rootAndClientRootCASame;
313-
params.nodeToNodeChange = getNodeToNodeChange();
334+
params.nodeToNodeChange = taskParams().nodeToNodeChange;
314335
AnsibleConfigureServers task = createTask(AnsibleConfigureServers.class);
315336
task.initialize(params);
316337
task.setUserTaskUUID(getUserTaskUUID());
317338
return task;
318339
}
319340

320-
/*
321-
* Returns:
322-
* 1: If task is to enable node-to-node encryption
323-
* -1: If task is to disable node-to-node encryption
324-
* 0: If there is no change in node-to-node encryption
325-
*/
326-
private int getNodeToNodeChange() {
327-
return getUserIntent().enableNodeToNodeEncrypt != taskParams().enableNodeToNodeEncrypt
328-
? (taskParams().enableNodeToNodeEncrypt ? 1 : -1)
329-
: 0;
330-
}
331-
332341
public void createCheckCertificateConfigTask() {
333342
MastersAndTservers nodes = getNodesToBeRestarted();
334343
Set<NodeDetails> allNodes = toOrderedSet(nodes.asPair());

managed/src/main/java/com/yugabyte/yw/common/CustomerTaskManager.java

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import com.yugabyte.yw.forms.SoftwareUpgradeParams;
4747
import com.yugabyte.yw.forms.SystemdUpgradeParams;
4848
import com.yugabyte.yw.forms.ThirdpartySoftwareUpgradeParams;
49+
import com.yugabyte.yw.forms.TlsToggleParams;
4950
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams;
5051
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.SoftwareUpgradeState;
5152
import com.yugabyte.yw.forms.UniverseTaskParams;
@@ -856,6 +857,9 @@ public CustomerTask retryCustomerTask(UUID customerUUID, UUID taskUUID) {
856857
case CertsRotate:
857858
taskParams = Json.fromJson(oldTaskParams, CertsRotateParams.class);
858859
break;
860+
case TlsToggle:
861+
taskParams = Json.fromJson(oldTaskParams, TlsToggleParams.class);
862+
break;
859863
case SystemdUpgrade:
860864
taskParams = Json.fromJson(oldTaskParams, SystemdUpgradeParams.class);
861865
break;

managed/src/main/java/com/yugabyte/yw/forms/TlsToggleParams.java

+96-95
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.yugabyte.yw.common.certmgmt.CertConfigType;
1313
import com.yugabyte.yw.models.CertificateInfo;
1414
import com.yugabyte.yw.models.Universe;
15+
import io.swagger.annotations.ApiModelProperty;
1516
import java.util.UUID;
1617
import play.mvc.Http;
1718
import play.mvc.Http.Status;
@@ -24,10 +25,9 @@ public class TlsToggleParams extends UpgradeTaskParams {
2425
public boolean enableClientToNodeEncrypt = false;
2526
public boolean allowInsecure = true;
2627

27-
// below fields are already inherited from UniverseDefinitionTaskParams
28-
// public UUID rootCA = null;
29-
// public UUID clientRootCA = null;
30-
// public Boolean rootAndClientRootCASame = null;
28+
// Do not include in swagger.
29+
@ApiModelProperty(hidden = true)
30+
public int nodeToNodeChange = 0;
3131

3232
public TlsToggleParams() {}
3333

@@ -43,101 +43,102 @@ public TlsToggleParams(
4343

4444
@Override
4545
public void verifyParams(Universe universe, boolean isFirstTry) {
46-
super.verifyParams(universe, isFirstTry);
47-
48-
UniverseDefinitionTaskParams universeDetails = universe.getUniverseDetails();
49-
UserIntent userIntent = universeDetails.getPrimaryCluster().userIntent;
50-
boolean existingEnableClientToNodeEncrypt = userIntent.enableClientToNodeEncrypt;
51-
boolean existingEnableNodeToNodeEncrypt = userIntent.enableNodeToNodeEncrypt;
52-
UUID existingRootCA = universeDetails.rootCA;
53-
UUID existingClientRootCA = universeDetails.getClientRootCA();
54-
55-
// Due to a bug, temporarily disable rolling upgrade for TLS toggle.
46+
// Due to a bug (PLAT-9434), temporarily disable rolling upgrade for TLS toggle.
5647
if (upgradeOption != UpgradeOption.NON_ROLLING_UPGRADE) {
5748
throw new PlatformServiceException(
5849
Status.BAD_REQUEST, "TLS toggle can only be performed in a non-rolling manner.");
5950
}
60-
61-
if (this.enableClientToNodeEncrypt == existingEnableClientToNodeEncrypt
62-
&& this.enableNodeToNodeEncrypt == existingEnableNodeToNodeEncrypt) {
63-
throw new PlatformServiceException(
64-
Status.BAD_REQUEST, "No changes in Tls parameters, cannot perform update operation.");
65-
}
66-
67-
if (existingRootCA != null && rootCA != null && !existingRootCA.equals(rootCA)) {
68-
throw new PlatformServiceException(
69-
Status.BAD_REQUEST, "Cannot update root certificate, if already created.");
70-
}
71-
72-
if (existingClientRootCA != null
73-
&& clientRootCA != null
74-
&& !existingClientRootCA.equals(clientRootCA)) {
75-
throw new PlatformServiceException(
76-
Status.BAD_REQUEST, "Cannot update client root certificate, if already created.");
77-
}
78-
79-
if (!CertificateInfo.isCertificateValid(rootCA)) {
80-
throw new PlatformServiceException(
81-
Status.BAD_REQUEST, "No valid root certificate found for UUID: " + rootCA);
82-
}
83-
84-
if (!CertificateInfo.isCertificateValid(clientRootCA)) {
85-
throw new PlatformServiceException(
86-
Status.BAD_REQUEST, "No valid client root certificate found for UUID: " + clientRootCA);
87-
}
88-
89-
if (rootCA != null
90-
&& CertificateInfo.get(rootCA).getCertType() == CertConfigType.CustomServerCert) {
91-
throw new PlatformServiceException(
92-
Http.Status.BAD_REQUEST,
93-
"CustomServerCert are only supported for Client to Server Communication.");
94-
}
95-
96-
if (rootCA != null
97-
&& CertificateInfo.get(rootCA).getCertType() == CertConfigType.CustomCertHostPath
98-
&& !userIntent.providerType.equals(CloudType.onprem)) {
99-
throw new PlatformServiceException(
100-
Status.BAD_REQUEST,
101-
"CustomCertHostPath certificates are only supported for on-prem providers.");
102-
}
103-
104-
if (clientRootCA != null
105-
&& CertificateInfo.get(clientRootCA).getCertType() == CertConfigType.CustomCertHostPath
106-
&& !userIntent.providerType.equals(Common.CloudType.onprem)) {
107-
throw new PlatformServiceException(
108-
Http.Status.BAD_REQUEST,
109-
"CustomCertHostPath certificates are only supported for on-prem providers.");
110-
}
111-
112-
// TODO: Add check that the userIntent is to use cert-manager
113-
if (rootCA != null
114-
&& CertificateInfo.get(rootCA).getCertType() == CertConfigType.K8SCertManager
115-
&& !userIntent.providerType.equals(CloudType.kubernetes)) {
116-
throw new PlatformServiceException(
117-
Status.BAD_REQUEST,
118-
"K8SCertManager certificates are only supported for k8s providers with cert-manager"
119-
+ " configured.");
120-
}
121-
122-
// TODO: Add check that the userIntent is to use cert-manager
123-
if (clientRootCA != null
124-
&& CertificateInfo.get(clientRootCA).getCertType() == CertConfigType.K8SCertManager
125-
&& !userIntent.providerType.equals(Common.CloudType.kubernetes)) {
126-
throw new PlatformServiceException(
127-
Http.Status.BAD_REQUEST,
128-
"K8SCertManager certificates are only supported for k8s providers with cert-manager"
129-
+ " configured.");
130-
}
131-
132-
if (rootAndClientRootCASame
133-
&& enableNodeToNodeEncrypt
134-
&& enableClientToNodeEncrypt
135-
&& rootCA != null
136-
&& clientRootCA != null
137-
&& !rootCA.equals(clientRootCA)) {
138-
throw new PlatformServiceException(
139-
Http.Status.BAD_REQUEST,
140-
"RootCA and ClientRootCA cannot be different when rootAndClientRootCASame is true.");
51+
super.verifyParams(universe, isFirstTry);
52+
if (isFirstTry) {
53+
// Validate against the current settings in the universe.
54+
UniverseDefinitionTaskParams universeDetails = universe.getUniverseDetails();
55+
UserIntent userIntent = universeDetails.getPrimaryCluster().userIntent;
56+
boolean existingEnableClientToNodeEncrypt = userIntent.enableClientToNodeEncrypt;
57+
boolean existingEnableNodeToNodeEncrypt = userIntent.enableNodeToNodeEncrypt;
58+
UUID existingRootCA = universeDetails.rootCA;
59+
UUID existingClientRootCA = universeDetails.getClientRootCA();
60+
61+
if (this.enableClientToNodeEncrypt == existingEnableClientToNodeEncrypt
62+
&& this.enableNodeToNodeEncrypt == existingEnableNodeToNodeEncrypt) {
63+
throw new PlatformServiceException(
64+
Status.BAD_REQUEST, "No changes in Tls parameters, cannot perform update operation.");
65+
}
66+
67+
if (existingRootCA != null && rootCA != null && !existingRootCA.equals(rootCA)) {
68+
throw new PlatformServiceException(
69+
Status.BAD_REQUEST, "Cannot update root certificate, if already created.");
70+
}
71+
72+
if (existingClientRootCA != null
73+
&& clientRootCA != null
74+
&& !existingClientRootCA.equals(clientRootCA)) {
75+
throw new PlatformServiceException(
76+
Status.BAD_REQUEST, "Cannot update client root certificate, if already created.");
77+
}
78+
79+
if (!CertificateInfo.isCertificateValid(rootCA)) {
80+
throw new PlatformServiceException(
81+
Status.BAD_REQUEST, "No valid root certificate found for UUID: " + rootCA);
82+
}
83+
84+
if (!CertificateInfo.isCertificateValid(clientRootCA)) {
85+
throw new PlatformServiceException(
86+
Status.BAD_REQUEST, "No valid client root certificate found for UUID: " + clientRootCA);
87+
}
88+
89+
if (rootCA != null
90+
&& CertificateInfo.get(rootCA).getCertType() == CertConfigType.CustomServerCert) {
91+
throw new PlatformServiceException(
92+
Http.Status.BAD_REQUEST,
93+
"CustomServerCert are only supported for Client to Server Communication.");
94+
}
95+
96+
if (rootCA != null
97+
&& CertificateInfo.get(rootCA).getCertType() == CertConfigType.CustomCertHostPath
98+
&& !userIntent.providerType.equals(CloudType.onprem)) {
99+
throw new PlatformServiceException(
100+
Status.BAD_REQUEST,
101+
"CustomCertHostPath certificates are only supported for on-prem providers.");
102+
}
103+
104+
if (clientRootCA != null
105+
&& CertificateInfo.get(clientRootCA).getCertType() == CertConfigType.CustomCertHostPath
106+
&& !userIntent.providerType.equals(Common.CloudType.onprem)) {
107+
throw new PlatformServiceException(
108+
Http.Status.BAD_REQUEST,
109+
"CustomCertHostPath certificates are only supported for on-prem providers.");
110+
}
111+
112+
// TODO: Add check that the userIntent is to use cert-manager
113+
if (rootCA != null
114+
&& CertificateInfo.get(rootCA).getCertType() == CertConfigType.K8SCertManager
115+
&& !userIntent.providerType.equals(CloudType.kubernetes)) {
116+
throw new PlatformServiceException(
117+
Status.BAD_REQUEST,
118+
"K8SCertManager certificates are only supported for k8s providers with cert-manager"
119+
+ " configured.");
120+
}
121+
122+
// TODO: Add check that the userIntent is to use cert-manager
123+
if (clientRootCA != null
124+
&& CertificateInfo.get(clientRootCA).getCertType() == CertConfigType.K8SCertManager
125+
&& !userIntent.providerType.equals(Common.CloudType.kubernetes)) {
126+
throw new PlatformServiceException(
127+
Http.Status.BAD_REQUEST,
128+
"K8SCertManager certificates are only supported for k8s providers with cert-manager"
129+
+ " configured.");
130+
}
131+
132+
if (rootAndClientRootCASame
133+
&& enableNodeToNodeEncrypt
134+
&& enableClientToNodeEncrypt
135+
&& rootCA != null
136+
&& clientRootCA != null
137+
&& !rootCA.equals(clientRootCA)) {
138+
throw new PlatformServiceException(
139+
Http.Status.BAD_REQUEST,
140+
"RootCA and ClientRootCA cannot be different when rootAndClientRootCASame is true.");
141+
}
141142
}
142143
}
143144

managed/src/test/java/com/yugabyte/yw/commissioner/TaskExecutorTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ public class TaskExecutorTest extends PlatformGuiceApplicationBaseTest {
128128
TaskType.RestartUniverseKubernetesUpgrade,
129129
TaskType.ThirdpartySoftwareUpgrade,
130130
TaskType.CertsRotate,
131+
TaskType.TlsToggle,
131132
TaskType.SystemdUpgrade,
132133
TaskType.ModifyAuditLoggingConfig,
133134
TaskType.StartMasterOnNode,

0 commit comments

Comments
 (0)