|
1 | 1 | use std::{cmp, mem};
|
2 | 2 |
|
3 | 3 | use core_common::byte_size_of::ByteSizeOf;
|
| 4 | +use ordered_float::OrderedFloat; |
4 | 5 | use serde::{Deserialize, Serialize};
|
5 | 6 |
|
| 7 | +use crate::event::metric::Bucket; |
| 8 | + |
6 | 9 | const AGENT_DEFAULT_BIN_LIMIT: u16 = 4096;
|
7 | 10 | const AGENT_DEFAULT_EPS: f64 = 1.0 / 128.0;
|
8 | 11 | const AGENT_DEFAULT_MIN_VALUE: f64 = 1.0e-9;
|
@@ -294,6 +297,7 @@ impl AgentDDSketch {
|
294 | 297 | self.is_empty().then(|| self.avg)
|
295 | 298 | }
|
296 | 299 |
|
| 300 | + /// Clears the sketch, removing all bins and resetting all statistics. |
297 | 301 | pub fn clear(&mut self) {
|
298 | 302 | self.count = 0;
|
299 | 303 | self.min = f64::MAX;
|
@@ -459,7 +463,7 @@ impl AgentDDSketch {
|
459 | 463 | self.insert_key_counts(vec![(key, n)]);
|
460 | 464 | }
|
461 | 465 |
|
462 |
| - pub fn insert_interpolate(&mut self, lower: f64, upper: f64, count: u32) { |
| 466 | + fn insert_interpolate_bucket(&mut self, lower: f64, upper: f64, count: u32) { |
463 | 467 | // Find the keys for the bins where the lower bound and upper bound would end up, and
|
464 | 468 | // collect all of the keys in between, inclusive.
|
465 | 469 | let lower_key = self.config.key(lower);
|
@@ -516,6 +520,29 @@ impl AgentDDSketch {
|
516 | 520 | self.insert_key_counts(key_counts);
|
517 | 521 | }
|
518 | 522 |
|
| 523 | + pub fn insert_interpolate_buckets(&mut self, mut buckets: Vec<Bucket>) { |
| 524 | + // Buckets need to be sorted from lowest to highest so that we can properly calculate the |
| 525 | + // rolling lower/upper bounds. |
| 526 | + buckets.sort_by(|a, b| { |
| 527 | + let oa = OrderedFloat(a.upper_limit); |
| 528 | + let ob = OrderedFloat(b.upper_limit); |
| 529 | + |
| 530 | + oa.cmp(&ob) |
| 531 | + }); |
| 532 | + |
| 533 | + let mut lower = 0.0; |
| 534 | + |
| 535 | + for bucket in buckets { |
| 536 | + let mut upper = bucket.upper_limit; |
| 537 | + if upper.is_sign_positive() && upper.is_infinite() { |
| 538 | + upper = lower; |
| 539 | + } |
| 540 | + |
| 541 | + self.insert_interpolate_bucket(lower, upper, bucket.count); |
| 542 | + lower = bucket.upper_limit; |
| 543 | + } |
| 544 | + } |
| 545 | + |
519 | 546 | pub fn quantile(&self, q: f64) -> Option<f64> {
|
520 | 547 | if self.count == 0 {
|
521 | 548 | return None;
|
@@ -860,10 +887,24 @@ fn round_to_even(v: f64) -> f64 {
|
860 | 887 |
|
861 | 888 | #[cfg(test)]
|
862 | 889 | mod tests {
|
| 890 | + use crate::{event::metric::Bucket, metrics::handle::Histogram}; |
| 891 | + |
863 | 892 | use super::{round_to_even, AgentDDSketch, Config, AGENT_DEFAULT_EPS};
|
864 | 893 |
|
865 | 894 | const FLOATING_POINT_ACCEPTABLE_ERROR: f64 = 1.0e-10;
|
866 | 895 |
|
| 896 | + static HISTO_VALUES: &[u64] = &[ |
| 897 | + 104221, 10206, 32436, 121686, 92848, 83685, 23739, 15122, 50491, 88507, 48318, 28004, |
| 898 | + 29576, 8735, 77693, 33965, 88047, 7592, 64138, 59966, 117956, 112525, 41743, 82790, 27084, |
| 899 | + 26967, 75008, 10752, 96636, 97150, 60768, 33411, 24746, 91872, 59057, 48329, 16756, 100459, |
| 900 | + 117640, 59244, 107584, 124303, 32368, 109940, 106353, 90452, 84471, 39086, 91119, 89680, |
| 901 | + 41339, 23329, 25629, 98156, 97002, 9538, 73671, 112586, 101616, 70719, 117291, 90043, |
| 902 | + 10713, 49195, 60656, 60887, 47332, 113675, 8371, 42619, 33489, 108629, 70501, 84355, 24576, |
| 903 | + 34468, 76756, 110706, 42854, 83841, 120751, 66494, 65210, 70244, 118529, 28021, 51603, |
| 904 | + 96315, 92364, 59120, 118968, 5484, 91790, 45171, 102756, 29673, 85303, 108322, 122793, |
| 905 | + 88373, |
| 906 | + ]; |
| 907 | + |
867 | 908 | #[cfg(ddsketch_extended)]
|
868 | 909 | fn generate_pareto_distribution() -> Vec<OrderedFloat<f64>> {
|
869 | 910 | use ordered_float::OrderedFloat;
|
@@ -1093,6 +1134,28 @@ mod tests {
|
1093 | 1134 | test_relative_accuracy(config, AGENT_DEFAULT_EPS, min_value, max_value)
|
1094 | 1135 | }
|
1095 | 1136 |
|
| 1137 | + #[test] |
| 1138 | + fn test_histogram_interpolation() { |
| 1139 | + let mut histo_sketch = AgentDDSketch::with_agent_defaults(); |
| 1140 | + assert!(histo_sketch.is_empty()); |
| 1141 | + |
| 1142 | + let histo = Histogram::new(); |
| 1143 | + for num in HISTO_VALUES { |
| 1144 | + histo.record((*num as f64) / 10_000.0); |
| 1145 | + } |
| 1146 | + |
| 1147 | + let buckets = histo |
| 1148 | + .buckets() |
| 1149 | + .map(|(ub, n)| Bucket { |
| 1150 | + upper_limit: ub, |
| 1151 | + count: n, |
| 1152 | + }) |
| 1153 | + .collect::<Vec<_>>(); |
| 1154 | + histo_sketch.insert_interpolate_buckets(buckets); |
| 1155 | + |
| 1156 | + assert!(!histo_sketch.is_empty()); |
| 1157 | + } |
| 1158 | + |
1096 | 1159 | fn test_relative_accuracy(config: Config, rel_acc: f64, min_value: f32, max_value: f32) {
|
1097 | 1160 | let max_observed_rel_acc = check_max_relative_accuracy(config, min_value, max_value);
|
1098 | 1161 | assert!(
|
|
0 commit comments