@@ -516,7 +516,7 @@ impl BuildState {
516
516
let mut payload = select_payload ( morsel. df ( ) . clone ( ) , payload_selector) ;
517
517
payload. rechunk_mut ( ) ;
518
518
519
- hash_keys. gen_partition_idxs (
519
+ hash_keys. gen_idxs_per_partition (
520
520
& partitioner,
521
521
& mut local. morsel_idxs_values_per_p ,
522
522
& mut local. sketch_per_p ,
@@ -644,6 +644,8 @@ impl ProbeState {
644
644
) -> PolarsResult < MorselSeq > {
645
645
// TODO: shuffle after partitioning and keep probe tables thread-local.
646
646
let mut partition_idxs = vec ! [ Vec :: new( ) ; partitioner. num_partitions( ) ] ;
647
+ let mut probe_partitions = Vec :: new ( ) ;
648
+ let mut materialized_idxsize_range = Vec :: new ( ) ;
647
649
let mut table_match = Vec :: new ( ) ;
648
650
let mut probe_match = Vec :: new ( ) ;
649
651
let mut max_seq = MorselSeq :: default ( ) ;
@@ -690,79 +692,142 @@ impl ProbeState {
690
692
let max_match_per_key_est = selectivity_estimate as usize + 16 ;
691
693
let out_est_size = ( ( selectivity_estimate * 1.2 * df_height as f64 ) as usize ) . min ( probe_limit as usize ) ;
692
694
build_out. reserve ( out_est_size + max_match_per_key_est) ;
693
- probe_out. reserve ( out_est_size + max_match_per_key_est) ;
694
695
695
696
unsafe {
696
- // Partition and probe the tables.
697
- for p in partition_idxs. iter_mut ( ) {
698
- p. clear ( ) ;
699
- }
700
- hash_keys. gen_partition_idxs (
701
- & partitioner,
702
- & mut partition_idxs,
703
- & mut [ ] ,
704
- emit_unmatched,
705
- ) ;
697
+ let new_morsel = |build : & mut DataFrameBuilder , probe : & mut DataFrameBuilder | {
698
+ let mut build_df = build. freeze_reset ( ) ;
699
+ let mut probe_df = probe. freeze_reset ( ) ;
700
+ let out_df = if params. left_is_build . unwrap ( ) {
701
+ build_df. hstack_mut_unchecked ( probe_df. get_columns ( ) ) ;
702
+ build_df
703
+ } else {
704
+ probe_df. hstack_mut_unchecked ( build_df. get_columns ( ) ) ;
705
+ probe_df
706
+ } ;
707
+ let out_df = postprocess_join ( out_df, params) ;
708
+ Morsel :: new ( out_df, seq, src_token. clone ( ) )
709
+ } ;
710
+
706
711
if params. preserve_order_probe {
707
- todo ! ( )
708
- } else {
709
- let new_morsel = |mut build_df : DataFrame , mut probe_df : DataFrame | {
710
- let out_df = if params. left_is_build . unwrap ( ) {
711
- build_df. hstack_mut_unchecked ( probe_df. get_columns ( ) ) ;
712
- build_df
713
- } else {
714
- probe_df. hstack_mut_unchecked ( build_df. get_columns ( ) ) ;
715
- probe_df
712
+ // To preserve the order we can't do bulk probes per partition and must follow
713
+ // the order of the probe morsel. We can still group probes that are
714
+ // consecutively on the same partition.
715
+ hash_keys. gen_partitions ( & partitioner, & mut probe_partitions, emit_unmatched) ;
716
+ let mut probe_group_start = 0 ;
717
+ while probe_group_start < probe_partitions. len ( ) {
718
+ let p_idx = probe_partitions[ probe_group_start] ;
719
+ let mut probe_group_end = probe_group_start + 1 ;
720
+ while probe_partitions. get ( probe_group_end) == Some ( & p_idx) {
721
+ probe_group_end += 1 ;
722
+ }
723
+ let Some ( p) = partitions. get ( p_idx as usize ) else {
724
+ probe_group_start = probe_group_end;
725
+ continue ;
716
726
} ;
717
- let out_df = postprocess_join ( out_df, params) ;
718
- Morsel :: new ( out_df, seq, src_token. clone ( ) )
719
- } ;
727
+
728
+ materialized_idxsize_range. extend ( materialized_idxsize_range. len ( ) as IdxSize ..probe_group_end as IdxSize ) ;
729
+
730
+ while probe_group_start < probe_group_end {
731
+ let matches_before_limit = probe_limit - probe_match. len ( ) as IdxSize ;
732
+ table_match. clear ( ) ;
733
+ probe_group_start += p. hash_table . probe_subset (
734
+ & hash_keys,
735
+ & materialized_idxsize_range[ probe_group_start..probe_group_end] ,
736
+ & mut table_match,
737
+ & mut probe_match,
738
+ mark_matches,
739
+ emit_unmatched,
740
+ matches_before_limit,
741
+ ) as usize ;
742
+
743
+ if emit_unmatched {
744
+ build_out. opt_gather_extend ( & p. payload , & table_match, ShareStrategy :: Always ) ;
745
+ } else {
746
+ build_out. gather_extend ( & p. payload , & table_match, ShareStrategy :: Always ) ;
747
+ } ;
748
+
749
+ if probe_match. len ( ) >= probe_limit as usize || probe_group_start == probe_partitions. len ( ) {
750
+ if !payload_rechunked {
751
+ payload. rechunk_mut ( ) ;
752
+ payload_rechunked = true ;
753
+ }
754
+ probe_out. gather_extend ( & payload, & probe_match, ShareStrategy :: Always ) ;
755
+ probe_match. clear ( ) ;
756
+ let out_morsel = new_morsel ( & mut build_out, & mut probe_out) ;
757
+ if send. send ( out_morsel) . await . is_err ( ) {
758
+ return Ok ( max_seq) ;
759
+ }
760
+ if probe_group_end != probe_partitions. len ( ) {
761
+ // We had enough matches to need a mid-partition flush, let's assume there are a lot of
762
+ // matches and just do a large reserve.
763
+ build_out. reserve ( probe_limit as usize + max_match_per_key_est) ;
764
+ }
765
+ }
766
+ }
767
+ }
768
+ } else {
769
+ // Partition and probe the tables.
770
+ for p in partition_idxs. iter_mut ( ) {
771
+ p. clear ( ) ;
772
+ }
773
+ hash_keys. gen_idxs_per_partition (
774
+ & partitioner,
775
+ & mut partition_idxs,
776
+ & mut [ ] ,
777
+ emit_unmatched,
778
+ ) ;
720
779
721
780
for ( p, idxs_in_p) in partitions. iter ( ) . zip ( & partition_idxs) {
722
781
let mut offset = 0 ;
723
782
while offset < idxs_in_p. len ( ) {
783
+ let matches_before_limit = probe_limit - probe_match. len ( ) as IdxSize ;
784
+ table_match. clear ( ) ;
724
785
offset += p. hash_table . probe_subset (
725
786
& hash_keys,
726
787
& idxs_in_p[ offset..] ,
727
788
& mut table_match,
728
789
& mut probe_match,
729
790
mark_matches,
730
791
emit_unmatched,
731
- probe_limit - probe_out . len ( ) as IdxSize ,
792
+ matches_before_limit ,
732
793
) as usize ;
733
794
734
- if probe_match . is_empty ( ) {
795
+ if table_match . is_empty ( ) {
735
796
continue ;
736
797
}
737
- total_matches += probe_match . len ( ) ;
798
+ total_matches += table_match . len ( ) ;
738
799
739
- // Gather output and send.
740
800
if emit_unmatched {
741
801
build_out. opt_gather_extend ( & p. payload , & table_match, ShareStrategy :: Always ) ;
742
802
} else {
743
803
build_out. gather_extend ( & p. payload , & table_match, ShareStrategy :: Always ) ;
744
804
} ;
745
- if !payload_rechunked {
746
- payload. rechunk_mut ( ) ;
747
- payload_rechunked = true ;
748
- }
749
- probe_out. gather_extend ( & payload, & probe_match, ShareStrategy :: Always ) ;
750
805
751
- if probe_out. len ( ) >= probe_limit as usize {
752
- let out_morsel = new_morsel ( build_out. freeze_reset ( ) , probe_out. freeze_reset ( ) ) ;
806
+ if probe_match. len ( ) >= probe_limit as usize {
807
+ if !payload_rechunked {
808
+ payload. rechunk_mut ( ) ;
809
+ payload_rechunked = true ;
810
+ }
811
+ probe_out. gather_extend ( & payload, & probe_match, ShareStrategy :: Always ) ;
812
+ probe_match. clear ( ) ;
813
+ let out_morsel = new_morsel ( & mut build_out, & mut probe_out) ;
753
814
if send. send ( out_morsel) . await . is_err ( ) {
754
815
return Ok ( max_seq) ;
755
816
}
756
817
// We had enough matches to need a mid-partition flush, let's assume there are a lot of
757
818
// matches and just do a large reserve.
758
819
build_out. reserve ( probe_limit as usize + max_match_per_key_est) ;
759
- probe_out. reserve ( probe_limit as usize + max_match_per_key_est) ;
760
820
}
761
821
}
762
822
}
763
823
764
- if !probe_out. is_empty ( ) {
765
- let out_morsel = new_morsel ( build_out. freeze_reset ( ) , probe_out. freeze_reset ( ) ) ;
824
+ if !probe_match. is_empty ( ) {
825
+ if !payload_rechunked {
826
+ payload. rechunk_mut ( ) ;
827
+ }
828
+ probe_out. gather_extend ( & payload, & probe_match, ShareStrategy :: Always ) ;
829
+ probe_match. clear ( ) ;
830
+ let out_morsel = new_morsel ( & mut build_out, & mut probe_out) ;
766
831
if send. send ( out_morsel) . await . is_err ( ) {
767
832
return Ok ( max_seq) ;
768
833
}
0 commit comments