@@ -52,6 +52,10 @@ use crate::task::AtomicU64Ref;
52
52
/// enum is not supported in const generic.
53
53
// TODO: Use enum to replace this once [feature(adt_const_params)](https://github.com/rust-lang/rust/issues/95174) get completed.
54
54
pub type JoinTypePrimitive = u8 ;
55
+
56
+ /// Evict the cache every n rows.
57
+ const EVICT_EVERY_N_ROWS : u32 = 1024 ;
58
+
55
59
#[ allow( non_snake_case, non_upper_case_globals) ]
56
60
pub mod JoinType {
57
61
use super :: JoinTypePrimitive ;
@@ -242,6 +246,8 @@ pub struct HashJoinExecutor<K: HashKey, S: StateStore, const T: JoinTypePrimitiv
242
246
metrics : Arc < StreamingMetrics > ,
243
247
/// The maximum size of the chunk produced by executor at a time
244
248
chunk_size : usize ,
249
+ /// Count the messages received, clear to 0 when counted to `EVICT_EVERY_N_MESSAGES`
250
+ cnt_rows_received : u32 ,
245
251
246
252
/// watermark column index -> `BufferedWatermarks`
247
253
watermark_buffers : BTreeMap < usize , BufferedWatermarks < SideTypePrimitive > > ,
@@ -603,6 +609,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
603
609
append_only_optimize,
604
610
metrics,
605
611
chunk_size,
612
+ cnt_rows_received : 0 ,
606
613
watermark_buffers,
607
614
}
608
615
}
@@ -662,6 +669,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
662
669
chunk,
663
670
self . append_only_optimize ,
664
671
self . chunk_size ,
672
+ & mut self . cnt_rows_received ,
665
673
) {
666
674
left_time += left_start_time. elapsed ( ) ;
667
675
yield Message :: Chunk ( chunk?) ;
@@ -687,6 +695,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
687
695
chunk,
688
696
self . append_only_optimize ,
689
697
self . chunk_size ,
698
+ & mut self . cnt_rows_received ,
690
699
) {
691
700
right_time += right_start_time. elapsed ( ) ;
692
701
yield Message :: Chunk ( chunk?) ;
@@ -752,14 +761,23 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
752
761
// `commit` them here.
753
762
self . side_l . ht . flush ( epoch) . await ?;
754
763
self . side_r . ht . flush ( epoch) . await ?;
755
-
756
- // We need to manually evict the cache to the target capacity.
757
- self . side_l . ht . evict ( ) ;
758
- self . side_r . ht . evict ( ) ;
759
-
760
764
Ok ( ( ) )
761
765
}
762
766
767
+ // We need to manually evict the cache.
768
+ fn evict_cache (
769
+ side_update : & mut JoinSide < K , S > ,
770
+ side_match : & mut JoinSide < K , S > ,
771
+ cnt_rows_received : & mut u32 ,
772
+ ) {
773
+ * cnt_rows_received += 1 ;
774
+ if * cnt_rows_received == EVICT_EVERY_N_ROWS {
775
+ side_update. ht . evict ( ) ;
776
+ side_match. ht . evict ( ) ;
777
+ * cnt_rows_received = 0 ;
778
+ }
779
+ }
780
+
763
781
fn handle_watermark (
764
782
& mut self ,
765
783
side : SideTypePrimitive ,
@@ -850,6 +868,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
850
868
chunk : StreamChunk ,
851
869
append_only_optimize : bool ,
852
870
chunk_size : usize ,
871
+ cnt_rows_received : & ' a mut u32 ,
853
872
) {
854
873
let chunk = chunk. compact ( ) ;
855
874
@@ -870,6 +889,8 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
870
889
871
890
let keys = K :: build ( & side_update. join_key_indices , chunk. data_chunk ( ) ) ?;
872
891
for ( ( op, row) , key) in chunk. rows ( ) . zip_eq_debug ( keys. iter ( ) ) {
892
+ Self :: evict_cache ( side_update, side_match, cnt_rows_received) ;
893
+
873
894
let matched_rows: Option < HashValueType > =
874
895
Self :: hash_eq_match ( key, & mut side_match. ht ) . await ?;
875
896
match op {
0 commit comments