@@ -105,6 +105,85 @@ def indicate_duplicates(
105
105
)
106
106
107
107
108
+ def interpolate_linear (block : blocks .Block ) -> blocks .Block :
109
+ backwards_window = windows .WindowSpec (following = 0 )
110
+ forwards_window = windows .WindowSpec (preceding = 0 )
111
+
112
+ output_column_ids = []
113
+
114
+ original_columns = block .value_columns
115
+ original_labels = block .column_labels
116
+ block , offsets = block .promote_offsets ()
117
+ for column in original_columns :
118
+ # null in same places column is null
119
+ should_interpolate = block ._column_type (column ) in [
120
+ pd .Float64Dtype (),
121
+ pd .Int64Dtype (),
122
+ ]
123
+ if should_interpolate :
124
+ block , notnull = block .apply_unary_op (column , ops .notnull_op )
125
+ block , masked_offsets = block .apply_binary_op (
126
+ offsets , notnull , ops .partial_arg3 (ops .where_op , None )
127
+ )
128
+
129
+ block , previous_value = block .apply_window_op (
130
+ column , agg_ops .LastNonNullOp (), backwards_window
131
+ )
132
+ block , next_value = block .apply_window_op (
133
+ column , agg_ops .FirstNonNullOp (), forwards_window
134
+ )
135
+ block , previous_value_offset = block .apply_window_op (
136
+ masked_offsets , agg_ops .LastNonNullOp (), backwards_window
137
+ )
138
+ block , next_value_offset = block .apply_window_op (
139
+ masked_offsets , agg_ops .FirstNonNullOp (), forwards_window
140
+ )
141
+
142
+ block , prediction_id = _interpolate (
143
+ block ,
144
+ previous_value_offset ,
145
+ previous_value ,
146
+ next_value_offset ,
147
+ next_value ,
148
+ offsets ,
149
+ )
150
+
151
+ block , interpolated_column = block .apply_binary_op (
152
+ column , prediction_id , ops .fillna_op
153
+ )
154
+ # pandas linear interpolation also extrapolates forward like 'ffill'
155
+ block , interpolated_column = block .apply_window_op (
156
+ interpolated_column , agg_ops .LastNonNullOp (), backwards_window
157
+ )
158
+
159
+ output_column_ids .append (interpolated_column )
160
+ else :
161
+ output_column_ids .append (column )
162
+
163
+ return block .select_columns (output_column_ids ).with_column_labels (original_labels )
164
+
165
+
166
+ def _interpolate (
167
+ block : blocks .Block ,
168
+ x0_id : str ,
169
+ y0_id : str ,
170
+ x1_id : str ,
171
+ y1_id : str ,
172
+ xpredict_id : str ,
173
+ ) -> typing .Tuple [blocks .Block , str ]:
174
+ """Applies linear interpolation equation to predict y values for xpredict."""
175
+ block , x1x0diff = block .apply_binary_op (x1_id , x0_id , ops .sub_op )
176
+ block , y1y0diff = block .apply_binary_op (y1_id , y0_id , ops .sub_op )
177
+ block , xpredictx0diff = block .apply_binary_op (xpredict_id , x0_id , ops .sub_op )
178
+
179
+ block , y1_weight = block .apply_binary_op (y1y0diff , x1x0diff , ops .div_op )
180
+ block , y1_part = block .apply_binary_op (xpredictx0diff , y1_weight , ops .mul_op )
181
+
182
+ block , prediction_id = block .apply_binary_op (y0_id , y1_part , ops .add_op )
183
+ block = block .drop_columns ([x1x0diff , y1y0diff , xpredictx0diff , y1_weight , y1_part ])
184
+ return block , prediction_id
185
+
186
+
108
187
def drop_duplicates (
109
188
block : blocks .Block , columns : typing .Sequence [str ], keep : str = "first"
110
189
) -> blocks .Block :
0 commit comments