@@ -121,6 +121,8 @@ public class BatchAppenderator implements Appenderator
121
121
private final long maxBytesTuningConfig ;
122
122
private final boolean skipBytesInMemoryOverheadCheck ;
123
123
private final boolean useMaxMemoryEstimates ;
124
+ private final int messageGapRowSampleSize ;
125
+ private final boolean aggregateMessageGapStats ;
124
126
125
127
private volatile ListeningExecutorService persistExecutor = null ;
126
128
private volatile ListeningExecutorService pushExecutor = null ;
@@ -190,6 +192,8 @@ public class BatchAppenderator implements Appenderator
190
192
skipBytesInMemoryOverheadCheck = tuningConfig .isSkipBytesInMemoryOverheadCheck ();
191
193
maxPendingPersists = tuningConfig .getMaxPendingPersists ();
192
194
this .useMaxMemoryEstimates = useMaxMemoryEstimates ;
195
+ this .aggregateMessageGapStats = tuningConfig .isAggregateMessageGapStatsEnabled ();
196
+ this .messageGapRowSampleSize = tuningConfig .getMessageGapRowSampleSize ();
193
197
this .centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig ;
194
198
this .fingerprintGenerator = new FingerprintGenerator (objectMapper );
195
199
}
@@ -287,6 +291,9 @@ public AppenderatorAddResult add(
287
291
}
288
292
289
293
final Sink sink = getOrCreateSink (identifier );
294
+ if (aggregateMessageGapStats && totalRows % messageGapRowSampleSize == 0 ) {
295
+ metrics .reportMessageGapAggregate (System .currentTimeMillis () - row .getTimestampFromEpoch ());
296
+ }
290
297
metrics .reportMessageMaxTimestamp (row .getTimestampFromEpoch ());
291
298
final int sinkRowsInMemoryBeforeAdd = sink .getNumRowsInMemory ();
292
299
final int sinkRowsInMemoryAfterAdd ;
@@ -513,8 +520,10 @@ public void clear()
513
520
private void clear (Map <SegmentIdWithShardSpec , Sink > sinksToClear , boolean removeOnDiskData )
514
521
{
515
522
// Drop commit metadata, then abandon all segments.
516
- log .info ("Clearing all[%d] sinks & their hydrants, removing data on disk: [%s]" ,
517
- sinksToClear .size (), removeOnDiskData );
523
+ log .info (
524
+ "Clearing all[%d] sinks & their hydrants, removing data on disk: [%s]" ,
525
+ sinksToClear .size (), removeOnDiskData
526
+ );
518
527
// Drop everything.
519
528
Iterator <Map .Entry <SegmentIdWithShardSpec , Sink >> sinksIterator = sinksToClear .entrySet ().iterator ();
520
529
sinksIterator .forEachRemaining (entry -> {
@@ -582,8 +591,9 @@ public ListenableFuture<Object> persistAll(@Nullable final Committer committer)
582
591
// the invariant of exactly one, always swappable, sink with exactly one unpersisted hydrant must hold
583
592
int totalHydrantsForSink = hydrants .size ();
584
593
if (totalHydrantsForSink != 1 ) {
585
- throw new ISE ("There should be only one hydrant for identifier[%s] but there are[%s]" ,
586
- identifier , totalHydrantsForSink
594
+ throw new ISE (
595
+ "There should be only one hydrant for identifier[%s] but there are[%s]" ,
596
+ identifier , totalHydrantsForSink
587
597
);
588
598
}
589
599
totalHydrantsCount ++;
@@ -635,8 +645,9 @@ public ListenableFuture<Object> persistAll(@Nullable final Committer committer)
635
645
metrics .incrementPersistTimeMillis (persistMillis );
636
646
persistStopwatch .stop ();
637
647
// make sure no push can start while persisting:
638
- log .info ("Persisted rows[%,d] and bytes[%,d] and removed all sinks & hydrants from memory in[%d] millis" ,
639
- numPersistedRows , bytesPersisted , persistMillis
648
+ log .info (
649
+ "Persisted rows[%,d] and bytes[%,d] and removed all sinks & hydrants from memory in[%d] millis" ,
650
+ numPersistedRows , bytesPersisted , persistMillis
640
651
);
641
652
log .info ("Persist is done." );
642
653
}
@@ -656,6 +667,7 @@ public ListenableFuture<Object> persistAll(@Nullable final Committer committer)
656
667
/**
657
668
* All sinks will be persisted so do a shallow copy of the Sinks map, reset
658
669
* the map and metadata (i.e. memory consumption counters) so that ingestion can go on
670
+ *
659
671
* @return The map of sinks to persist, this map will be garbage collected after
660
672
* persist is complete since we will not be keeping a reference to it...
661
673
*/
@@ -740,8 +752,9 @@ public ListenableFuture<SegmentsAndCommitMetadata> push(
740
752
log .warn ("mergeAndPush[%s] returned null, skipping." , identifier );
741
753
}
742
754
}
743
- log .info ("Push done: total sinks merged[%d], total hydrants merged[%d]" ,
744
- identifiers .size (), totalHydrantsMerged
755
+ log .info (
756
+ "Push done: total sinks merged[%d], total hydrants merged[%d]" ,
757
+ identifiers .size (), totalHydrantsMerged
745
758
);
746
759
return new SegmentsAndCommitMetadata (dataSegments , commitMetadata , segmentSchemaMapping );
747
760
},
@@ -753,8 +766,8 @@ public ListenableFuture<SegmentsAndCommitMetadata> push(
753
766
/**
754
767
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
755
768
*
756
- * @param identifier sink identifier
757
- * @param sink sink to push
769
+ * @param identifier sink identifier
770
+ * @param sink sink to push
758
771
* @return segment descriptor along with schema, or null if the sink is no longer valid
759
772
*/
760
773
private DataSegmentWithMetadata mergeAndPush (
@@ -784,8 +797,10 @@ private DataSegmentWithMetadata mergeAndPush(
784
797
if (sm == null ) {
785
798
log .warn ("Sink metadata not found just before merge for identifier [%s]" , identifier );
786
799
} else if (numHydrants != sm .getNumHydrants ()) {
787
- throw new ISE ("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]" ,
788
- numHydrants , identifier , sm .getNumHydrants ());
800
+ throw new ISE (
801
+ "Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]" ,
802
+ numHydrants , identifier , sm .getNumHydrants ()
803
+ );
789
804
}
790
805
791
806
try {
@@ -1260,12 +1275,14 @@ private int calculateSinkMemoryInUsed()
1260
1275
*/
1261
1276
private static class SinkMetadata
1262
1277
{
1263
- /** This is used to maintain the rows in the sink accross persists of the sink
1278
+ /**
1279
+ * This is used to maintain the rows in the sink accross persists of the sink
1264
1280
* used for functionality (i.e. to detect whether an incremental push
1265
1281
* is needed {@link AppenderatorDriverAddResult#isPushRequired(Integer, Long)}
1266
1282
**/
1267
1283
private int numRowsInSegment ;
1268
- /** For sanity check as well as functionality: to make sure that all hydrants for a sink are restored from disk at
1284
+ /**
1285
+ * For sanity check as well as functionality: to make sure that all hydrants for a sink are restored from disk at
1269
1286
* push time and also to remember the fire hydrant "count" when persisting it.
1270
1287
*/
1271
1288
private int numHydrants ;
0 commit comments