Skip to content

Commit 5c19093

Browse files
authored
Fix df from result table error message + old types (#543)
* Added dictionary options as parameters for type matching for dataframe_from_result_table func * reformatted the file * reformatted the file * reformatted the file * Fixes after PR * Fixes after PR * Fixes after PR * Fixes after PR * Fixes after PR * For python 3.7 3.8 * Nullable bools fix * Nullable bools fix * Nullable bools fix * Nullable bools fix2 * PR comment fixes * LRU cache default paramaters for python 3.7 * LRU cache maxsize=1 * modified changes requested * black * fix for numpy 2.0 * fix numpy 2.0 nan * fix numpy 2.0 ninf, inf * fix numpy 2.0 -inf * fix tenacity>=8.3 * fix tenacity>=8.3 * black * fixed error message in case of none existing type + added old type names in kusto * Delete azure-kusto-ingest/azure/kusto/ingest/V2/__init__.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/blob_source.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/compression_type.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/ingestion_source.py * Update setup.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/kusto_storage_uploader.py * Delete azure-kusto-ingest/azure/kusto/ingest/V2/local_source.py * fixed error message in case of none existing type + added old type names in kusto * added test for missing and old types * black * using pytest.raises
1 parent 0865ed5 commit 5c19093

File tree

3 files changed

+52
-19
lines changed

3 files changed

+52
-19
lines changed

azure-kusto-data/azure/kusto/data/helpers.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,22 @@ def default_dict() -> Converter:
1818
return {
1919
"string": lambda col, df: df[col].astype(pd.StringDtype()) if hasattr(pd, "StringDType") else df[col],
2020
"guid": lambda col, df: df[col],
21+
"uuid": lambda col, df: df[col],
22+
"uniqueid": lambda col, df: df[col],
2123
"dynamic": lambda col, df: df[col],
2224
"bool": lambda col, df: df[col].astype(bool),
25+
"boolean": lambda col, df: df[col].astype(bool),
2326
"int": lambda col, df: df[col].astype(pd.Int32Dtype()),
27+
"int32": lambda col, df: df[col].astype(pd.Int32Dtype()),
28+
"int64": lambda col, df: df[col].astype(pd.Int64Dtype()),
2429
"long": lambda col, df: df[col].astype(pd.Int64Dtype()),
2530
"real": lambda col, df: parse_float(df, col),
31+
"double": lambda col, df: parse_float(df, col),
2632
"decimal": lambda col, df: parse_float(df, col),
2733
"datetime": lambda col, df: parse_datetime(df, col),
34+
"date": lambda col, df: parse_datetime(df, col),
2835
"timespan": lambda col, df: df[col].apply(parse_timedelta),
36+
"time": lambda col, df: df[col].apply(parse_timedelta),
2937
}
3038

3139

@@ -67,13 +75,15 @@ def dataframe_from_result_table(
6775
column_name = col.column_name
6876
column_type = col.column_type
6977
if converters_by_column_name and column_name in converters_by_column_name:
70-
converter = converters_by_column_name[column_name]
78+
converter = converters_by_column_name.get(column_name)
7179
elif converters_by_type and column_type in converters_by_type:
72-
converter = converters_by_type[column_type]
80+
converter = converters_by_type.get(column_type)
7381
elif nullable_bools and column_type == "bool":
7482
converter = lambda col, df: df[col].astype(pd.BooleanDtype())
7583
else:
76-
converter = default[column_type]
84+
converter = default.get(column_type)
85+
if converter is None:
86+
raise Exception("Unexpected type " + column_type)
7787
if isinstance(converter, str):
7888
frame[column_name] = frame[column_name].astype(converter)
7989
else:

azure-kusto-data/tests/input/dataframe.json

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,21 @@
7373
"ColumnName": "RecordReal",
7474
"ColumnType": "real"
7575
},
76+
{
77+
"ColumnName": "RecordDouble",
78+
"ColumnType": "double"
79+
},
7680
{
7781
"ColumnName": "RecordDecimal",
7882
"ColumnType": "decimal"
7983
},
8084
{
8185
"ColumnName": "RecordDynamic",
8286
"ColumnType": "dynamic"
87+
},
88+
{
89+
"ColumnName": "MissingType",
90+
"ColumnType": "missing"
8391
}
8492
],
8593
"Rows": [
@@ -92,8 +100,8 @@
92100
222,
93101
92233720368,
94102
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
95-
3.14159, 1.2,
96-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
103+
3.14159, 7.89, 1.2,
104+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
97105

98106
],
99107
[
@@ -105,8 +113,8 @@
105113
222,
106114
92233720368,
107115
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
108-
"NaN", "NaN",
109-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
116+
"NaN", "NaN", "NaN",
117+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
110118

111119
],
112120
[
@@ -118,8 +126,8 @@
118126
222,
119127
92233720368,
120128
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
121-
"Infinity", "Infinity",
122-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
129+
"Infinity", "Infinity", "Infinity",
130+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
123131

124132
],
125133
[
@@ -131,8 +139,8 @@
131139
222,
132140
92233720368,
133141
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
134-
"-Infinity", "-Infinity",
135-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
142+
"-Infinity", "-Infinity", "-Infinity",
143+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
136144

137145
],
138146
[
@@ -144,8 +152,8 @@
144152
222,
145153
92233720368,
146154
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
147-
3.14159, 1.2,
148-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
155+
3.14159, 7.89, 1.2,
156+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
149157

150158
],
151159
[
@@ -157,8 +165,8 @@
157165
222,
158166
92233720368,
159167
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
160-
3.14159, 1.2,
161-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
168+
3.14159, 7.89, 1.2,
169+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
162170

163171
],
164172
[
@@ -170,8 +178,8 @@
170178
222,
171179
92233720368,
172180
"6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
173-
3.14159, 1.2,
174-
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
181+
3.14159, 7.89, 1.2,
182+
"{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
175183

176184
],
177185
[

azure-kusto-data/tests/test_helpers.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import json
55
import os
66

7+
import pytest
8+
79
from azure.kusto.data._models import KustoResultTable
810
from azure.kusto.data.helpers import dataframe_from_result_table
911
from azure.kusto.data.response import KustoResponseDataSetV2
@@ -20,7 +22,11 @@ def test_dataframe_from_result_table():
2022

2123
response = KustoResponseDataSetV2(json.loads(data))
2224
# Test when given both types of dictionary parameters that type conversion doesn't override column name conversion
23-
test_dict_by_name = {"RecordName": lambda col, frame: frame[col].astype("str"), "RecordInt64": lambda col, frame: frame[col].astype("int64")}
25+
test_dict_by_name = {
26+
"RecordName": lambda col, frame: frame[col].astype("str"),
27+
"RecordInt64": lambda col, frame: frame[col].astype("int64"),
28+
"MissingType": lambda col, frame: frame[col].astype("str"),
29+
}
2430
test_dict_by_type = {"int": lambda col, frame: frame[col].astype("int32")}
2531
df = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_dict_by_type, converters_by_column_name=test_dict_by_name)
2632

@@ -52,6 +58,8 @@ def test_dataframe_from_result_table():
5258
assert df.iloc[0].RecordLong == 92233720368
5359
assert type(df.iloc[0].RecordReal) is numpy.float64
5460
assert df.iloc[0].RecordReal == 3.14159
61+
assert type(df.iloc[0].RecordDouble) is numpy.float64
62+
assert df.iloc[0].RecordDouble == 7.89
5563
assert type(df.iloc[0].RecordDecimal) is numpy.float64
5664
assert df.iloc[0].RecordDecimal == 1.2
5765

@@ -90,10 +98,17 @@ def test_dataframe_from_result_table():
9098

9199
# Testing int to float conversion
92100
test_int_to_float = {"int": "float64"}
93-
df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float)
101+
ignore_missing_type = {
102+
"MissingType": lambda col, frame: frame[col].astype("str"),
103+
}
104+
df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float, converters_by_column_name=ignore_missing_type)
94105
assert type(df_int_to_float.iloc[0].RecordInt) is numpy.float64
95106
assert df.iloc[0].RecordInt == 5678
96107

108+
# Testing missing type conversion
109+
with pytest.raises(Exception):
110+
df_missing_type = dataframe_from_result_table(response.primary_results[0])
111+
97112

98113
def test_pandas_mixed_date():
99114
df = dataframe_from_result_table(

0 commit comments

Comments
 (0)