@@ -851,8 +851,6 @@ def test_read_pandas_timedelta_dataframes(session, write_engine):
851
851
.astype ("timedelta64[ns]" )
852
852
)
853
853
854
- if write_engine == "bigquery_streaming" :
855
- expected_df .index = pd .Index ([pd .NA ] * 3 , dtype = "Int64" )
856
854
pd .testing .assert_frame_equal (actual_result , expected_df , check_index_type = False )
857
855
858
856
@@ -869,16 +867,14 @@ def test_read_pandas_timedelta_series(session, write_engine):
869
867
.astype ("timedelta64[ns]" )
870
868
)
871
869
872
- if write_engine == "bigquery_streaming" :
873
- expected_series .index = pd .Index ([pd .NA ] * 3 , dtype = "Int64" )
874
870
pd .testing .assert_series_equal (
875
871
actual_result , expected_series , check_index_type = False
876
872
)
877
873
878
874
879
875
@pytest .mark .parametrize (
880
876
"write_engine" ,
881
- ["default" , "bigquery_inline" , "bigquery_load" ],
877
+ ["default" , "bigquery_inline" , "bigquery_load" , "bigquery_streaming" ],
882
878
)
883
879
def test_read_pandas_timedelta_index (session , write_engine ):
884
880
expected_index = pd .to_timedelta (
@@ -918,14 +914,17 @@ def test_read_pandas_json_dataframes(session, write_engine):
918
914
expected_df , write_engine = write_engine
919
915
).to_pandas ()
920
916
921
- if write_engine == "bigquery_streaming" :
922
- expected_df .index = pd .Index ([pd .NA ] * 4 , dtype = "Int64" )
923
917
pd .testing .assert_frame_equal (actual_result , expected_df , check_index_type = False )
924
918
925
919
926
920
@pytest .mark .parametrize (
927
- "write_engine" ,
928
- ["default" , "bigquery_load" ],
921
+ ("write_engine" ),
922
+ [
923
+ pytest .param ("default" ),
924
+ pytest .param ("bigquery_load" ),
925
+ pytest .param ("bigquery_streaming" ),
926
+ pytest .param ("bigquery_inline" , marks = pytest .mark .xfail (raises = ValueError )),
927
+ ],
929
928
)
930
929
def test_read_pandas_json_series (session , write_engine ):
931
930
json_data = [
@@ -949,6 +948,8 @@ def test_read_pandas_json_series(session, write_engine):
949
948
[
950
949
pytest .param ("default" ),
951
950
pytest .param ("bigquery_load" ),
951
+ pytest .param ("bigquery_streaming" ),
952
+ pytest .param ("bigquery_inline" , marks = pytest .mark .xfail (raises = ValueError )),
952
953
],
953
954
)
954
955
def test_read_pandas_json_index (session , write_engine ):
@@ -970,6 +971,8 @@ def test_read_pandas_json_index(session, write_engine):
970
971
[
971
972
pytest .param ("default" ),
972
973
pytest .param ("bigquery_load" ),
974
+ pytest .param ("bigquery_streaming" ),
975
+ pytest .param ("bigquery_inline" , marks = pytest .mark .xfail (raises = ValueError )),
973
976
],
974
977
)
975
978
def test_read_pandas_w_nested_json (session , write_engine ):
@@ -997,6 +1000,8 @@ def test_read_pandas_w_nested_json(session, write_engine):
997
1000
[
998
1001
pytest .param ("default" ),
999
1002
pytest .param ("bigquery_load" ),
1003
+ pytest .param ("bigquery_streaming" ),
1004
+ pytest .param ("bigquery_inline" , marks = pytest .mark .xfail (raises = ValueError )),
1000
1005
],
1001
1006
)
1002
1007
def test_read_pandas_w_nested_json_index (session , write_engine ):
@@ -1031,52 +1036,43 @@ def test_read_pandas_w_nested_json_index(session, write_engine):
1031
1036
("bigquery_streaming" ,),
1032
1037
),
1033
1038
)
1034
- def test_read_csv_gcs_default_engine (session , scalars_dfs , gcs_folder , write_engine ):
1035
- scalars_df , _ = scalars_dfs
1039
+ def test_read_csv_for_gcs_file_w_default_engine (
1040
+ session , scalars_dfs , gcs_folder , write_engine
1041
+ ):
1042
+ scalars_df , scalars_pandas_df = scalars_dfs
1036
1043
path = gcs_folder + "test_read_csv_gcs_default_engine_w_index*.csv"
1037
1044
read_path = utils .get_first_file_from_wildcard (path )
1038
- scalars_df .to_csv (path , index = False )
1045
+ scalars_df .to_csv (path , index = True )
1039
1046
dtype = scalars_df .dtypes .to_dict ()
1040
1047
dtype .pop ("geography_col" )
1041
- df = session .read_csv (
1048
+ result_df = session .read_csv (
1042
1049
read_path ,
1043
1050
# Convert default pandas dtypes to match BigQuery DataFrames dtypes.
1044
1051
dtype = dtype ,
1045
1052
write_engine = write_engine ,
1053
+ index_col = "rowindex" ,
1046
1054
)
1047
1055
1048
- # TODO(chelsealin): If we serialize the index, can more easily compare values.
1049
- pd .testing .assert_index_equal (df .columns , scalars_df .columns )
1050
-
1051
1056
# The auto detects of BigQuery load job have restrictions to detect the bytes,
1052
- # numeric and geometry types, so they're skipped here.
1053
- df = df . drop ( columns = ["bytes_col" , "numeric_col" , "geography_col" ])
1054
- scalars_df = scalars_df .drop (columns = [ "bytes_col" , "numeric_col" , "geography_col" ] )
1055
- assert df . shape [ 0 ] == scalars_df . shape [ 0 ]
1056
- pd .testing .assert_series_equal ( df . dtypes , scalars_df . dtypes )
1057
+ # datetime, numeric and geometry types, so they're skipped here.
1058
+ drop_columns = ["bytes_col" , "numeric_col" , "geography_col" ]
1059
+ result_df = result_df .drop (columns = drop_columns )
1060
+ scalars_pandas_df = scalars_pandas_df . drop ( columns = drop_columns )
1061
+ pd .testing .assert_frame_equal ( result_df . to_pandas (), scalars_pandas_df )
1057
1062
1058
1063
1059
- def test_read_csv_gcs_bq_engine (session , scalars_dfs , gcs_folder ):
1060
- scalars_df , _ = scalars_dfs
1064
+ def test_read_csv_for_gcs_file_w_bq_engine (session , scalars_dfs , gcs_folder ):
1065
+ scalars_df , scalars_pandas_df = scalars_dfs
1061
1066
path = gcs_folder + "test_read_csv_gcs_bq_engine_w_index*.csv"
1062
- scalars_df .to_csv (path , index = False )
1063
- df = session .read_csv (
1064
- path ,
1065
- engine = "bigquery" ,
1066
- index_col = bigframes .enums .DefaultIndexKind .SEQUENTIAL_INT64 ,
1067
- )
1068
-
1069
- # TODO(chelsealin): If we serialize the index, can more easily compare values.
1070
- pd .testing .assert_index_equal (df .columns , scalars_df .columns )
1067
+ scalars_df .to_csv (path , index = True )
1068
+ result_df = session .read_csv (path , engine = "bigquery" , index_col = "rowindex" )
1071
1069
1072
1070
# The auto detects of BigQuery load job have restrictions to detect the bytes,
1073
1071
# datetime, numeric and geometry types, so they're skipped here.
1074
- df = df .drop (columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ])
1075
- scalars_df = scalars_df .drop (
1076
- columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1077
- )
1078
- assert df .shape [0 ] == scalars_df .shape [0 ]
1079
- pd .testing .assert_series_equal (df .dtypes , scalars_df .dtypes )
1072
+ drop_columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1073
+ result_df = result_df .drop (columns = drop_columns )
1074
+ scalars_pandas_df = scalars_pandas_df .drop (columns = drop_columns )
1075
+ pd .testing .assert_frame_equal (result_df .to_pandas (), scalars_pandas_df )
1080
1076
1081
1077
1082
1078
@pytest .mark .parametrize (
@@ -1091,28 +1087,23 @@ def test_read_csv_local_default_engine(session, scalars_dfs, sep):
1091
1087
scalars_df , scalars_pandas_df = scalars_dfs
1092
1088
with tempfile .TemporaryDirectory () as dir :
1093
1089
path = dir + "/test_read_csv_local_default_engine.csv"
1094
- # Using the pandas to_csv method because the BQ one does not support local write.
1095
- scalars_pandas_df .to_csv (path , index = False , sep = sep )
1090
+ scalars_df .to_csv (path , index = True , sep = sep )
1096
1091
dtype = scalars_df .dtypes .to_dict ()
1097
1092
dtype .pop ("geography_col" )
1098
- df = session .read_csv (
1093
+ result_df = session .read_csv (
1099
1094
path ,
1100
1095
sep = sep ,
1101
1096
# Convert default pandas dtypes to match BigQuery DataFrames dtypes.
1102
1097
dtype = dtype ,
1098
+ index_col = "rowindex" ,
1103
1099
)
1104
1100
1105
- # TODO(chelsealin): If we serialize the index, can more easily compare values.
1106
- pd .testing .assert_index_equal (df .columns , scalars_df .columns )
1107
-
1108
1101
# The auto detects of BigQuery load job have restrictions to detect the bytes,
1109
- # numeric and geometry types, so they're skipped here.
1110
- df = df .drop (columns = ["bytes_col" , "numeric_col" , "geography_col" ])
1111
- scalars_df = scalars_df .drop (
1112
- columns = ["bytes_col" , "numeric_col" , "geography_col" ]
1113
- )
1114
- assert df .shape [0 ] == scalars_df .shape [0 ]
1115
- pd .testing .assert_series_equal (df .dtypes , scalars_df .dtypes )
1102
+ # datetime, numeric and geometry types, so they're skipped here.
1103
+ drop_columns = ["bytes_col" , "numeric_col" , "geography_col" ]
1104
+ result_df = result_df .drop (columns = drop_columns )
1105
+ scalars_pandas_df = scalars_pandas_df .drop (columns = drop_columns )
1106
+ pd .testing .assert_frame_equal (result_df .to_pandas (), scalars_pandas_df )
1116
1107
1117
1108
1118
1109
@pytest .mark .parametrize (
@@ -1126,47 +1117,35 @@ def test_read_csv_local_bq_engine(session, scalars_dfs, sep):
1126
1117
scalars_df , scalars_pandas_df = scalars_dfs
1127
1118
with tempfile .TemporaryDirectory () as dir :
1128
1119
path = dir + "/test_read_csv_local_bq_engine.csv"
1129
- # Using the pandas to_csv method because the BQ one does not support local write.
1130
- scalars_pandas_df .to_csv (path , index = False , sep = sep )
1131
- df = session .read_csv (path , engine = "bigquery" , sep = sep )
1132
-
1133
- # TODO(chelsealin): If we serialize the index, can more easily compare values.
1134
- pd .testing .assert_index_equal (df .columns , scalars_df .columns )
1120
+ scalars_df .to_csv (path , index = True , sep = sep )
1121
+ result_df = session .read_csv (
1122
+ path , engine = "bigquery" , sep = sep , index_col = "rowindex"
1123
+ )
1135
1124
1136
1125
# The auto detects of BigQuery load job have restrictions to detect the bytes,
1137
1126
# datetime, numeric and geometry types, so they're skipped here.
1138
- df = df .drop (
1139
- columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1140
- )
1141
- scalars_df = scalars_df .drop (
1142
- columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1143
- )
1144
- assert df .shape [0 ] == scalars_df .shape [0 ]
1145
- pd .testing .assert_series_equal (df .dtypes , scalars_df .dtypes )
1127
+ drop_columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1128
+ result_df = result_df .drop (columns = drop_columns )
1129
+ scalars_pandas_df = scalars_pandas_df .drop (columns = drop_columns )
1130
+ pd .testing .assert_frame_equal (result_df .to_pandas (), scalars_pandas_df )
1146
1131
1147
1132
1148
1133
def test_read_csv_localbuffer_bq_engine (session , scalars_dfs ):
1149
1134
scalars_df , scalars_pandas_df = scalars_dfs
1150
1135
with tempfile .TemporaryDirectory () as dir :
1151
1136
path = dir + "/test_read_csv_local_bq_engine.csv"
1152
- # Using the pandas to_csv method because the BQ one does not support local write.
1153
- scalars_pandas_df .to_csv (path , index = False )
1137
+ scalars_df .to_csv (path , index = True )
1154
1138
with open (path , "rb" ) as buffer :
1155
- df = session .read_csv (buffer , engine = "bigquery" )
1156
-
1157
- # TODO(chelsealin): If we serialize the index, can more easily compare values.
1158
- pd .testing .assert_index_equal (df .columns , scalars_df .columns )
1139
+ result_df = session .read_csv (
1140
+ buffer , engine = "bigquery" , index_col = "rowindex"
1141
+ )
1159
1142
1160
1143
# The auto detects of BigQuery load job have restrictions to detect the bytes,
1161
1144
# datetime, numeric and geometry types, so they're skipped here.
1162
- df = df .drop (
1163
- columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1164
- )
1165
- scalars_df = scalars_df .drop (
1166
- columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1167
- )
1168
- assert df .shape [0 ] == scalars_df .shape [0 ]
1169
- pd .testing .assert_series_equal (df .dtypes , scalars_df .dtypes )
1145
+ drop_columns = ["bytes_col" , "datetime_col" , "numeric_col" , "geography_col" ]
1146
+ result_df = result_df .drop (columns = drop_columns )
1147
+ scalars_pandas_df = scalars_pandas_df .drop (columns = drop_columns )
1148
+ pd .testing .assert_frame_equal (result_df .to_pandas (), scalars_pandas_df )
1170
1149
1171
1150
1172
1151
def test_read_csv_bq_engine_supports_index_col_false (
@@ -1420,19 +1399,16 @@ def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index, en
1420
1399
with tempfile .TemporaryDirectory () as dir :
1421
1400
path = dir + "/test_read_csv_local_w_encoding.csv"
1422
1401
# Using the pandas to_csv method because the BQ one does not support local write.
1423
- penguins_pandas_df_default_index .to_csv (
1424
- path , index = False , encoding = "ISO-8859-1"
1425
- )
1402
+ penguins_pandas_df_default_index .index .name = "rowindex"
1403
+ penguins_pandas_df_default_index .to_csv (path , index = True , encoding = "ISO-8859-1" )
1426
1404
1427
1405
# File can only be read using the same character encoding as when written.
1428
- df = session .read_csv (path , engine = engine , encoding = "ISO-8859-1" )
1429
-
1430
- # TODO(chelsealin): If we serialize the index, can more easily compare values.
1431
- pd .testing .assert_index_equal (
1432
- df . columns , penguins_pandas_df_default_index . columns
1406
+ result_df = session .read_csv (
1407
+ path , engine = engine , encoding = "ISO-8859-1" , index_col = "rowindex"
1408
+ )
1409
+ pd .testing .assert_frame_equal (
1410
+ result_df . to_pandas () , penguins_pandas_df_default_index
1433
1411
)
1434
-
1435
- assert df .shape [0 ] == penguins_pandas_df_default_index .shape [0 ]
1436
1412
1437
1413
1438
1414
def test_read_pickle_local (session , penguins_pandas_df_default_index , tmp_path ):
0 commit comments