Open
Description
Hello
I spent days searching for CPU issue in my Dynatrace Python extension (which is basicaly a prometheus scraper for a Solace prometheus exporter).
I finally noticed a big performance gap between versions 0.21.1 and 0.22.1.
0.22.1 consumes 4 times more CPU than 0.21.1. I finally downgraded to 0.21.1 and my CPU is back to normal state.
Here is my usage:
# Prometheus parser: https://prometheus.github.io/client_python/parser/
for prometheus_line in utf8_lines:
for family in text_string_to_metric_families(prometheus_line):
for sample in family.samples:
skip = False # By default, no metric is skipped unless we filter it
if sample.name in wanted_solace_metrics:
found_metrics += 1
# self.logger.info("line: " + prometheus_line)
# print("Name: {0} Labels: {1} Value: {2}".format(*sample))
# NaN detection with math library
if math.isnan(sample.value):
invalid_metrics += 1
self.logger.info("NaN value skipped " + sample.name)
# If the value is NaN, we ignore it
break
else:
valid_metrics += 1
dims= {**sample[1], "node": nodename, "clustername": clustername}
# Remove unwanted dimensions
#define the keys to remove
keys = ['client_name', 'client_address', 'client_profile', 'flow_id', 'acl_profile']
for key in keys:
result_pop=dims.pop(key, None)
if sample.name not in censored_metrics_list and result_pop is not None:
# print("DETECTED")
censored_metrics_list.append(sample.name)
# parse exporter errors
if "error" in dims.keys():
solace_prometheus_exporter_error=str(dims["error"]).replace('\"',"").strip()
sanitized_solace_prometheus_exporter_error={ "error": solace_prometheus_exporter_error}
dims.update(sanitized_solace_prometheus_exporter_error)
# Remove unwanted queue protocols or modify queue names patterns
if "queue_name" in dims.keys():
# avoid the ingestion issue with bad queue names with trailing \n
queue_name=str(dims["queue_name"]).strip()
sanitized_queue_name = { "queue_name": queue_name }
# update the queue_name in the dims payload
dims.update(sanitized_queue_name)
queue_name_lower=queue_name.lower()
if queue_name_lower.startswith('#mqtt') or queue_name_lower.startswith('#cfgsync') or queue_name_lower.startswith('#p2p') or queue_name_lower.startswith('#pq') or queue_name_lower == "":
# if queue_name_lower.startswith('#cfgsync') or queue_name_lower.startswith('#p2p') or queue_name_lower.startswith('#pq') or queue_name_lower == "":
skip = True
# Manage non skipped metrics
if skip is False:
# Keeps queue quota calcuted metrics
if sample.name == "solace_queue_spool_usage_bytes":
queue_usage.append({ "metric_name": sample.name, "md5_dims": hashlib.md5(str(dims).encode("utf-8")).hexdigest(), **dims, "METRICvalueMETRIC": sample.value})
if sample.name == "solace_queue_spool_quota_bytes": # we store this metrics in a separate table to calculate disk usage later
queue_quota.append({ "metric_name": sample.name, "md5_dims": hashlib.md5(str(dims).encode("utf-8")).hexdigest(), **dims, "METRICvalueMETRIC": sample.value})
# send valid points (with dimensions strings as md5 if necessary)
if sample.name not in censored_metrics_list:
# Append valid points
valid_points.append( { "metric_name": sample.name, **dims, "METRICvalueMETRIC": sample.value})
else:
# if the metric is aggregated we add a md5sum of "all the dimensions" as an index to find duplicates and ease "groupby" without pandas
valid_points.append( { "metric_name": sample.name, "md5_dims": hashlib.md5(str(dims).encode("utf-8")).hexdigest(), **dims, "METRICvalueMETRIC": sample.value})
If this code is still correct in 0.22.1, I think there is an issue in newer versions 0.22.x.
Best regards,
Charles
Metadata
Metadata
Assignees
Labels
No labels