Skip to content

Commit 1047ea0

Browse files
committed
[PLAT-17182][PLAT-17154][PLAT-16913][PLAT-16785][PLAT-14383] Multiple metric/alert related fixes
Summary: Contains fixes for the following JIRAs: [PLAT-17182] Reactor Delay 'Metric Graph in Prometheus' Does Not Show Outbound Queue or Outbound Transfer [PLAT-17154] Raise the threshold for DB Queue Overflow Alert to 100 [PLAT-16913] Set defaultDurationSec: 300 for DDL atomicity alert [PLAT-16785] Promethus Auth settings are present as runtime configs [PLAT-14383] Backup metrics treats universe_name & universe_uuid as same string Test Plan: Tested manually Reviewers: vbansal Reviewed By: vbansal Subscribers: yugaware Differential Revision: https://phorge.dev.yugabyte.com/D42950
1 parent 282778f commit 1047ea0

File tree

8 files changed

+66
-18
lines changed

8 files changed

+66
-18
lines changed

managed/RUNTIME-FLAGS.md

-3
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,6 @@
127127
| "Devops command timeout" | "yb.devops.command_timeout" | "GLOBAL" | "Devops command timeout" | "Duration" |
128128
| "Node destroy command timeout" | "yb.node_ops.destroy_server_timeout" | "GLOBAL" | "Timeout for node destroy command before failing." | "Duration" |
129129
| "YBC Compatible DB Version" | "ybc.compatible_db_version" | "GLOBAL" | "Minimum YBDB version which supports YBC" | "String" |
130-
| "Prometheus auth enabled" | "yb.metrics.auth" | "GLOBAL" | "Enables basic authentication for Prometheus web UI/APIs access" | "Boolean" |
131-
| "Prometheus auth username" | "yb.metrics.auth_username" | "GLOBAL" | "Username, used for request authentication against embedded Prometheus" | "String" |
132-
| "Prometheus auth password" | "yb.metrics.auth_password" | "GLOBAL" | "Password, used for request authentication against embedded Prometheus" | "String" |
133130
| "Force YBC Shutdown during upgrade" | "ybc.upgrade.force_shutdown" | "GLOBAL" | "For YBC Shutdown during upgrade" | "Boolean" |
134131
| "Enable strict mode to ignore deprecated YBA APIs" | "yb.api.mode.strict" | "GLOBAL" | "Will ignore deprecated APIs" | "Boolean" |
135132
| "Enable safe mode to ignore preview YBA APIs" | "yb.api.mode.safe" | "GLOBAL" | "Will ignore preview APIs" | "Boolean" |

managed/src/main/java/com/yugabyte/yw/common/config/GlobalConfKeys.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1122,23 +1122,23 @@ public class GlobalConfKeys extends RuntimeConfigKeysModule {
11221122
"Prometheus auth enabled",
11231123
"Enables basic authentication for Prometheus web UI/APIs access",
11241124
ConfDataType.BooleanType,
1125-
ImmutableList.of(ConfKeyTags.PUBLIC));
1125+
ImmutableList.of(ConfKeyTags.INTERNAL));
11261126
public static final ConfKeyInfo<String> metricsAuthUsername =
11271127
new ConfKeyInfo<>(
11281128
"yb.metrics.auth_username",
11291129
ScopeType.GLOBAL,
11301130
"Prometheus auth username",
11311131
"Username, used for request authentication against embedded Prometheus",
11321132
ConfDataType.StringType,
1133-
ImmutableList.of(ConfKeyTags.PUBLIC));
1133+
ImmutableList.of(ConfKeyTags.INTERNAL));
11341134
public static final ConfKeyInfo<String> metricsAuthPassword =
11351135
new ConfKeyInfo<>(
11361136
"yb.metrics.auth_password",
11371137
ScopeType.GLOBAL,
11381138
"Prometheus auth password",
11391139
"Password, used for request authentication against embedded Prometheus",
11401140
ConfDataType.StringType,
1141-
ImmutableList.of(ConfKeyTags.PUBLIC));
1141+
ImmutableList.of(ConfKeyTags.INTERNAL));
11421142
public static final ConfKeyInfo<Boolean> forceYbcShutdownDuringUpgrade =
11431143
new ConfKeyInfo<>(
11441144
"ybc.upgrade.force_shutdown",

managed/src/main/java/com/yugabyte/yw/common/metrics/MetricLabelsBuilder.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import com.yugabyte.yw.models.Customer;
1616
import com.yugabyte.yw.models.Universe;
1717
import com.yugabyte.yw.models.helpers.KnownAlertLabels;
18-
import java.util.HashMap;
18+
import java.util.LinkedHashMap;
1919
import java.util.List;
2020
import java.util.Map;
2121
import java.util.stream.Collectors;
@@ -38,7 +38,7 @@ public class MetricLabelsBuilder {
3838
KnownAlertLabels.SOURCE_TYPE.labelName()
3939
};
4040

41-
private final Map<String, String> labels = new HashMap<>();
41+
private final Map<String, String> labels = new LinkedHashMap<>();
4242

4343
public static MetricLabelsBuilder create() {
4444
return new MetricLabelsBuilder();

managed/src/main/java/com/yugabyte/yw/controllers/AlertController.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,8 @@ private String getAlertExpressionUrl(
11911191
alert.getResolvedTime() != null
11921192
? alert.getResolvedTime().getTime()
11931193
: System.currentTimeMillis());
1194-
return metricUrlProvider.getExpressionUrl(expression, startUnixTime, endUnixTime);
1194+
return metricUrlProvider.getExpressionUrl(
1195+
Collections.singletonList(expression), startUnixTime, endUnixTime);
11951196
}
11961197

11971198
@VisibleForTesting

managed/src/main/java/com/yugabyte/yw/metrics/MetricQueryExecutor.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,12 @@ private JsonNode getMetrics(Map<String, String> queryParam) {
9292
return apiHelper.getRequest(queryUrl, headers, queryParam);
9393
}
9494

95-
private String getDirectURL(String queryExpr) {
95+
private String getDirectURL(List<String> queryExpr) {
9696
long endUnixTime = Long.parseLong(queryParam.getOrDefault("end", "0"));
9797
long startUnixTime = Long.parseLong(queryParam.getOrDefault("start", "0"));
98+
Long step = Long.parseLong(queryParam.get("step"));
9899

99-
return metricUrlProvider.getExpressionUrl(queryExpr, startUnixTime, endUnixTime);
100+
return metricUrlProvider.getExpressionUrl(queryExpr, startUnixTime, endUnixTime, step);
100101
}
101102

102103
@Override
@@ -147,6 +148,7 @@ public JsonNode call() {
147148
ArrayNode directURLs = responseJson.putArray("directURLs");
148149
responseJson.put(
149150
"metricsLinkUseBrowserFqdn", metricUrlProvider.getMetricsLinkUseBrowserFqdn());
151+
List<String> queryExpressions = new ArrayList<>();
150152
for (Map.Entry<String, String> e : queries.entrySet()) {
151153
String metric = e.getKey();
152154
String queryExpr = e.getValue();
@@ -160,7 +162,7 @@ public JsonNode call() {
160162
}
161163
queryParam.put("query", queryExpr);
162164
try {
163-
directURLs.add(getDirectURL(queryExpr));
165+
queryExpressions.add(queryExpr);
164166
} catch (Exception de) {
165167
log.trace("Error getting direct url", de);
166168
}
@@ -179,6 +181,7 @@ public JsonNode call() {
179181
output.addAll(queryResponse.getGraphData(metric, configDefinition, metricSettings));
180182
}
181183
}
184+
directURLs.add(getDirectURL(queryExpressions));
182185
if (isRecharts) {
183186
responseJson.set("data", Json.toJson(rechartsOutput));
184187
} else {

managed/src/main/java/com/yugabyte/yw/metrics/MetricUrlProvider.java

+31-5
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22

33
package com.yugabyte.yw.metrics;
44

5-
import com.cronutils.utils.StringUtils;
65
import com.yugabyte.yw.common.Util;
76
import com.yugabyte.yw.common.config.GlobalConfKeys;
87
import com.yugabyte.yw.common.config.RuntimeConfGetter;
98
import java.net.URLEncoder;
9+
import java.nio.charset.StandardCharsets;
10+
import java.util.List;
1011
import java.util.TimeZone;
1112
import javax.inject.Inject;
1213
import javax.inject.Singleton;
1314
import lombok.extern.slf4j.Slf4j;
15+
import org.apache.commons.lang3.StringUtils;
1416

1517
@Slf4j
1618
@Singleton
@@ -54,7 +56,12 @@ public String getMetricsExternalUrl() {
5456
return metricsExternalUrl;
5557
}
5658

57-
public String getExpressionUrl(String queryExpr, Long startUnixTime, Long endUnixTime) {
59+
public String getExpressionUrl(List<String> queryExpr, Long startUnixTime, Long endUnixTime) {
60+
return getExpressionUrl(queryExpr, startUnixTime, endUnixTime);
61+
}
62+
63+
public String getExpressionUrl(
64+
List<String> queryExpr, Long startUnixTime, Long endUnixTime, Long step) {
5865
String durationSecs = "3600s";
5966
String endString = "";
6067

@@ -67,11 +74,30 @@ public String getExpressionUrl(String queryExpr, Long startUnixTime, Long endUni
6774
durationSecs = String.format("%ds", (endUnixTime - startUnixTime));
6875
}
6976

77+
StringBuilder result =
78+
new StringBuilder(String.format("%s/graph?", this.getMetricsExternalUrl()));
79+
for (int i = 0; i < queryExpr.size(); i++) {
80+
if (i > 0) {
81+
result.append("&");
82+
}
83+
String expression = queryExpr.get(i);
84+
result.append(
85+
String.format(
86+
"g%d.expr=%s&g%d.tab=0&g%d.range_input=%s&g%d.end_input=%s",
87+
i,
88+
URLEncoder.encode(expression, StandardCharsets.UTF_8),
89+
i,
90+
i,
91+
durationSecs,
92+
i,
93+
endString));
94+
if (step != null) {
95+
result.append(String.format("&g%d.step_input=%d", i, step));
96+
}
97+
}
7098
// Note: this is the URL as prometheus' web interface renders these metrics. It is
7199
// possible this breaks over time as we upgrade prometheus.
72-
return String.format(
73-
"%s/graph?g0.expr=%s&g0.tab=0&g0.range_input=%s&g0.end_input=%s",
74-
this.getMetricsExternalUrl(), URLEncoder.encode(queryExpr), durationSecs, endString);
100+
return result.toString();
75101
}
76102

77103
public String getMetricsInternalUrl() {

managed/src/main/resources/alert/alert_templates.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ templates:
460460
createForNewCustomer: true
461461
defaultThresholdMap:
462462
WARNING:
463-
threshold: 0.0
463+
threshold: 100.0
464464
targetType: UNIVERSE
465465
defaultThresholdCondition: GREATER_THAN
466466
defaultThresholdUnit: COUNT
@@ -660,6 +660,7 @@ templates:
660660
queryTemplate: yb_ddl_atomicity_check{universe_uuid="__universeUuid__"}
661661
{{ query_condition }} {{ query_threshold }}
662662
createForNewCustomer: true
663+
defaultDurationSec: 300
663664
defaultThresholdMap:
664665
SEVERE:
665666
threshold: 1.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
-- Copyright (c) YugaByte, Inc.
2+
3+
-- Update DB_QUEUES_OVERFLOW alert threshold
4+
UPDATE alert_configuration
5+
SET thresholds = jsonb_set(
6+
thresholds::jsonb,
7+
'{WARNING,threshold}',
8+
to_jsonb(greatest((thresholds::jsonb -> 'WARNING' ->> 'threshold')::double precision, 100))
9+
)
10+
WHERE template = 'DB_QUEUES_OVERFLOW';
11+
12+
-- Update DB_QUEUES_OVERFLOW alert duration
13+
UPDATE alert_configuration
14+
SET duration_sec = greatest(duration_sec, 300)
15+
WHERE template = 'DDL_ATOMICITY_CHECK';
16+
17+
UPDATE alert_definition
18+
SET config_written = false
19+
WHERE configuration_uuid IN
20+
(select uuid from alert_configuration where template in ('DB_QUEUES_OVERFLOW', 'DDL_ATOMICITY_CHECK'));

0 commit comments

Comments
 (0)