@@ -690,6 +690,44 @@ def compile_variance(t, expr, scope, timecontext, context=None, **kwargs):
690
690
)
691
691
692
692
693
+ @compiles (ops .Covariance )
694
+ def compile_covariance (t , expr , scope , timecontext , context = None , ** kwargs ):
695
+ op = expr .op ()
696
+ how = op .how
697
+
698
+ fn = {"sample" : F .covar_samp , "pop" : F .covar_pop }[how ]
699
+
700
+ pyspark_double_type = ibis_dtype_to_spark_dtype (dtypes .double )
701
+ expr = op .__class__ (
702
+ left = op .left .cast (pyspark_double_type ),
703
+ right = op .right .cast (pyspark_double_type ),
704
+ how = how ,
705
+ where = op .where ,
706
+ ).to_expr ()
707
+ return compile_aggregator (
708
+ t , expr , scope , timecontext , fn = fn , context = context
709
+ )
710
+
711
+
712
+ @compiles (ops .Correlation )
713
+ def compile_correlation (t , expr , scope , timecontext , context = None , ** kwargs ):
714
+ op = expr .op ()
715
+
716
+ if (how := op .how ) == "pop" :
717
+ raise ValueError ("PySpark only implements sample correlation" )
718
+
719
+ pyspark_double_type = ibis_dtype_to_spark_dtype (dtypes .double )
720
+ expr = op .__class__ (
721
+ left = op .left .cast (pyspark_double_type ),
722
+ right = op .right .cast (pyspark_double_type ),
723
+ how = how ,
724
+ where = op .where ,
725
+ ).to_expr ()
726
+ return compile_aggregator (
727
+ t , expr , scope , timecontext , fn = F .corr , context = context
728
+ )
729
+
730
+
693
731
@compiles (ops .Arbitrary )
694
732
def compile_arbitrary (t , expr , scope , timecontext , context = None , ** kwargs ):
695
733
how = expr .op ().how
0 commit comments