17
17
18
18
use arrow:: array:: { ArrayRef , BooleanArray } ;
19
19
use arrow:: datatypes:: { DataType , Field , Int32Type , Schema } ;
20
- use arrow:: util:: bench_util:: { create_boolean_array, create_primitive_array} ;
20
+ use arrow:: util:: bench_util:: {
21
+ create_boolean_array, create_dict_from_values, create_primitive_array,
22
+ create_string_array_with_len,
23
+ } ;
21
24
use criterion:: { black_box, criterion_group, criterion_main, Criterion } ;
22
- use datafusion_expr:: { function:: AccumulatorArgs , AggregateUDFImpl , GroupsAccumulator } ;
25
+ use datafusion_expr:: {
26
+ function:: AccumulatorArgs , Accumulator , AggregateUDFImpl , GroupsAccumulator ,
27
+ } ;
23
28
use datafusion_functions_aggregate:: count:: Count ;
24
29
use datafusion_physical_expr:: expressions:: col;
25
30
use datafusion_physical_expr_common:: sort_expr:: LexOrdering ;
26
31
use std:: sync:: Arc ;
27
32
28
- fn prepare_accumulator ( ) -> Box < dyn GroupsAccumulator > {
33
+ fn prepare_group_accumulator ( ) -> Box < dyn GroupsAccumulator > {
29
34
let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "f" , DataType :: Int32 , true ) ] ) ) ;
30
35
let accumulator_args = AccumulatorArgs {
31
36
return_field : Field :: new ( "f" , DataType :: Int64 , true ) . into ( ) ,
@@ -44,13 +49,34 @@ fn prepare_accumulator() -> Box<dyn GroupsAccumulator> {
44
49
. unwrap ( )
45
50
}
46
51
52
+ fn prepare_accumulator ( ) -> Box < dyn Accumulator > {
53
+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
54
+ "f" ,
55
+ DataType :: Dictionary ( Box :: new( DataType :: Int32 ) , Box :: new( DataType :: Utf8 ) ) ,
56
+ true ,
57
+ ) ] ) ) ;
58
+ let accumulator_args = AccumulatorArgs {
59
+ return_field : Arc :: new ( Field :: new_list_field ( DataType :: Int64 , true ) ) ,
60
+ schema : & schema,
61
+ ignore_nulls : false ,
62
+ ordering_req : & LexOrdering :: default ( ) ,
63
+ is_reversed : false ,
64
+ name : "COUNT(f)" ,
65
+ is_distinct : true ,
66
+ exprs : & [ col ( "f" , & schema) . unwrap ( ) ] ,
67
+ } ;
68
+ let count_fn = Count :: new ( ) ;
69
+
70
+ count_fn. accumulator ( accumulator_args) . unwrap ( )
71
+ }
72
+
47
73
fn convert_to_state_bench (
48
74
c : & mut Criterion ,
49
75
name : & str ,
50
76
values : ArrayRef ,
51
77
opt_filter : Option < & BooleanArray > ,
52
78
) {
53
- let accumulator = prepare_accumulator ( ) ;
79
+ let accumulator = prepare_group_accumulator ( ) ;
54
80
c. bench_function ( name, |b| {
55
81
b. iter ( || {
56
82
black_box (
@@ -89,6 +115,18 @@ fn count_benchmark(c: &mut Criterion) {
89
115
values,
90
116
Some ( & filter) ,
91
117
) ;
118
+
119
+ let arr = create_string_array_with_len :: < i32 > ( 20 , 0.0 , 50 ) ;
120
+ let values =
121
+ Arc :: new ( create_dict_from_values :: < Int32Type > ( 200_000 , 0.8 , & arr) ) as ArrayRef ;
122
+
123
+ let mut accumulator = prepare_accumulator ( ) ;
124
+ c. bench_function ( "count low cardinality dict 20% nulls, no filter" , |b| {
125
+ b. iter ( || {
126
+ #[ allow( clippy:: unit_arg) ]
127
+ black_box ( accumulator. update_batch ( & [ values. clone ( ) ] ) . unwrap ( ) )
128
+ } )
129
+ } ) ;
92
130
}
93
131
94
132
criterion_group ! ( benches, count_benchmark) ;
0 commit comments