Fix df from result table error message + old types (#543)

mayamarom10 · web-flow · commit 5c1909339d30 · 2024-06-18T17:22:22.000+03:00
* Added dictionary options as parameters for type matching for dataframe_from_result_table func

* reformatted the file

* reformatted the file

* reformatted the file

* Fixes after PR

* Fixes after PR

* Fixes after PR

* Fixes after PR

* Fixes after PR

* For python 3.7 3.8

* Nullable bools fix

* Nullable bools fix

* Nullable bools fix

* Nullable bools fix2

* PR comment fixes

* LRU cache default paramaters for python 3.7

* LRU cache maxsize=1

* modified changes requested

* black

* fix for numpy 2.0

* fix numpy 2.0 nan

* fix numpy 2.0 ninf, inf

* fix numpy 2.0 -inf

* fix tenacity&gt;=8.3

* fix tenacity&gt;=8.3

* black

* fixed error message in case of none existing type + added old type names in kusto

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/__init__.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/blob_source.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/compression_type.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/ingestion_source.py

* Update setup.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/kusto_storage_uploader.py

* Delete azure-kusto-ingest/azure/kusto/ingest/V2/local_source.py

* fixed error message in case of none existing type + added old type names in kusto

* added test for missing and old types

* black

* using pytest.raises
diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py
@@ -18,14 +18,22 @@ def default_dict() -> Converter:
     return {
         "string": lambda col, df: df[col].astype(pd.StringDtype()) if hasattr(pd, "StringDType") else df[col],
         "guid": lambda col, df: df[col],
+        "uuid": lambda col, df: df[col],
+        "uniqueid": lambda col, df: df[col],
         "dynamic": lambda col, df: df[col],
         "bool": lambda col, df: df[col].astype(bool),
+        "boolean": lambda col, df: df[col].astype(bool),
         "int": lambda col, df: df[col].astype(pd.Int32Dtype()),
+        "int32": lambda col, df: df[col].astype(pd.Int32Dtype()),
+        "int64": lambda col, df: df[col].astype(pd.Int64Dtype()),
         "long": lambda col, df: df[col].astype(pd.Int64Dtype()),
         "real": lambda col, df: parse_float(df, col),
+        "double": lambda col, df: parse_float(df, col),
         "decimal": lambda col, df: parse_float(df, col),
         "datetime": lambda col, df: parse_datetime(df, col),
+        "date": lambda col, df: parse_datetime(df, col),
         "timespan": lambda col, df: df[col].apply(parse_timedelta),
+        "time": lambda col, df: df[col].apply(parse_timedelta),
     }
 
 
@@ -67,13 +75,15 @@ def dataframe_from_result_table(
         column_name = col.column_name
         column_type = col.column_type
         if converters_by_column_name and column_name in converters_by_column_name:
-            converter = converters_by_column_name[column_name]
+            converter = converters_by_column_name.get(column_name)
         elif converters_by_type and column_type in converters_by_type:
-            converter = converters_by_type[column_type]
+            converter = converters_by_type.get(column_type)
         elif nullable_bools and column_type == "bool":
             converter = lambda col, df: df[col].astype(pd.BooleanDtype())
         else:
-            converter = default[column_type]
+            converter = default.get(column_type)
+        if converter is None:
+            raise Exception("Unexpected type " + column_type)
         if isinstance(converter, str):
             frame[column_name] = frame[column_name].astype(converter)
         else:
diff --git a/azure-kusto-data/tests/input/dataframe.json b/azure-kusto-data/tests/input/dataframe.json
@@ -73,13 +73,21 @@
         "ColumnName": "RecordReal",
         "ColumnType": "real"
       },
+      {
+        "ColumnName": "RecordDouble",
+        "ColumnType": "double"
+      },
       {
         "ColumnName": "RecordDecimal",
         "ColumnType": "decimal"
       },
       {
         "ColumnName": "RecordDynamic",
         "ColumnType": "dynamic"
+      },
+      {
+        "ColumnName": "MissingType",
+        "ColumnType": "missing"
       }
     ],
     "Rows": [
@@ -92,8 +100,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
@@ -105,8 +113,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        "NaN", "NaN",
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        "NaN", "NaN", "NaN",
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
@@ -118,8 +126,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        "Infinity", "Infinity",
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        "Infinity", "Infinity", "Infinity",
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
@@ -131,8 +139,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        "-Infinity", "-Infinity",
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        "-Infinity", "-Infinity", "-Infinity",
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
@@ -144,8 +152,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
@@ -157,8 +165,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
@@ -170,8 +178,8 @@
         222,
         92233720368,
         "6f3c1072-2739-461c-8aa7-3cfc8ff528a8",
-        3.14159, 1.2,
-        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}"
+        3.14159, 7.89, 1.2,
+        "{\"Visualization\":null,\"Title\":null,\"XColumn\":null,\"Series\":null,\"YColumns\":null,\"XTitle\":null,\"YTitle\":null,\"XAxis\":null,\"YAxis\":null,\"Legend\":null,\"YSplit\":null,\"Accumulate\":false,\"IsQuerySorted\":false,\"Kind\":null}", "miss"
 
       ],
       [
diff --git a/azure-kusto-data/tests/test_helpers.py b/azure-kusto-data/tests/test_helpers.py
@@ -4,6 +4,8 @@
 import json
 import os
 
+import pytest
+
 from azure.kusto.data._models import KustoResultTable
 from azure.kusto.data.helpers import dataframe_from_result_table
 from azure.kusto.data.response import KustoResponseDataSetV2
@@ -20,7 +22,11 @@ def test_dataframe_from_result_table():
 
 response = KustoResponseDataSetV2(json.loads(data))
 # Test when given both types of dictionary parameters that type conversion doesn't override column name conversion
-test_dict_by_name = {"RecordName": lambda col, frame: frame[col].astype("str"), "RecordInt64": lambda col, frame: frame[col].astype("int64")}
+test_dict_by_name = {
+    "RecordName": lambda col, frame: frame[col].astype("str"),
+    "RecordInt64": lambda col, frame: frame[col].astype("int64"),
+    "MissingType": lambda col, frame: frame[col].astype("str"),
+}
 test_dict_by_type = {"int": lambda col, frame: frame[col].astype("int32")}
 df = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_dict_by_type, converters_by_column_name=test_dict_by_name)
 
@@ -52,6 +58,8 @@ def test_dataframe_from_result_table():
 assert df.iloc[0].RecordLong == 92233720368
 assert type(df.iloc[0].RecordReal) is numpy.float64
 assert df.iloc[0].RecordReal == 3.14159
+assert type(df.iloc[0].RecordDouble) is numpy.float64
+assert df.iloc[0].RecordDouble == 7.89
 assert type(df.iloc[0].RecordDecimal) is numpy.float64
 assert df.iloc[0].RecordDecimal == 1.2
 
@@ -90,10 +98,17 @@ def test_dataframe_from_result_table():
 
 # Testing int to float conversion
 test_int_to_float = {"int": "float64"}
-df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float)
+ignore_missing_type = {
+    "MissingType": lambda col, frame: frame[col].astype("str"),
+}
+df_int_to_float = dataframe_from_result_table(response.primary_results[0], converters_by_type=test_int_to_float, converters_by_column_name=ignore_missing_type)
 assert type(df_int_to_float.iloc[0].RecordInt) is numpy.float64
 assert df.iloc[0].RecordInt == 5678
 
+# Testing missing type conversion
+with pytest.raises(Exception):
+    df_missing_type = dataframe_from_result_table(response.primary_results[0])
+
 
 def test_pandas_mixed_date():
     df = dataframe_from_result_table(