20
20
"aws_secret_access_key" : "some_secret" ,
21
21
"endpoint" : "https://external-s3.com" ,
22
22
"path_prefix" : "a_folder/" ,
23
- "start_date" : "2022-01-01T01:02:03Z"
24
-
23
+ "start_date" : "2022-01-01T01:02:03Z" ,
25
24
},
26
25
"format" : {
27
26
"filetype" : "avro" ,
28
27
},
29
28
"path_pattern" : "**/*.avro" ,
30
- "schema" : '{"col1": "string", "col2": "integer"}'
29
+ "schema" : '{"col1": "string", "col2": "integer"}' ,
31
30
},
32
31
{
33
32
"bucket" : "test_bucket" ,
42
41
"globs" : ["a_folder/**/*.avro" ],
43
42
"validation_policy" : "Emit Record" ,
44
43
"input_schema" : '{"col1": "string", "col2": "integer"}' ,
45
- "format" : {
46
- "filetype" : "avro"
47
- }
44
+ "format" : {"filetype" : "avro" },
48
45
}
49
- ]
50
- }
51
- , id = "test_convert_legacy_config"
46
+ ],
47
+ },
48
+ id = "test_convert_legacy_config" ,
52
49
),
53
50
pytest .param (
54
51
{
70
67
"file_type" : "avro" ,
71
68
"globs" : ["**/*.avro" ],
72
69
"validation_policy" : "Emit Record" ,
73
- "format" : {
74
- "filetype" : "avro"
75
- }
70
+ "format" : {"filetype" : "avro" },
76
71
}
77
- ]
78
- }
79
- , id = "test_convert_no_optional_fields"
72
+ ],
73
+ },
74
+ id = "test_convert_no_optional_fields" ,
80
75
),
81
- ]
76
+ ],
82
77
)
83
78
def test_convert_legacy_config (legacy_config , expected_config ):
84
79
parsed_legacy_config = SourceS3Spec (** legacy_config )
@@ -101,8 +96,8 @@ def test_convert_legacy_config(legacy_config, expected_config):
101
96
"encoding" : "ansi" ,
102
97
"double_quote" : False ,
103
98
"newlines_in_values" : True ,
104
- "additional_reader_options" : "{ \ " strings_can_be_null\ " : true}" ,
105
- "advanced_options" : "{ \ " skip_rows\ " : 3, \ " skip_rows_after_names\ " : 5, \ " autogenerate_column_names\ " : true}" ,
99
+ "additional_reader_options" : '{ "strings_can_be_null": true}' ,
100
+ "advanced_options" : '{ "skip_rows": 3, "skip_rows_after_names": 5, "autogenerate_column_names": true}' ,
106
101
"blocksize" : 20000 ,
107
102
},
108
103
{
@@ -122,7 +117,8 @@ def test_convert_legacy_config(legacy_config, expected_config):
122
117
"autogenerate_column_names" : True ,
123
118
},
124
119
None ,
125
- id = "test_csv_all_legacy_options_set" ),
120
+ id = "test_csv_all_legacy_options_set" ,
121
+ ),
126
122
pytest .param (
127
123
"csv" ,
128
124
{
@@ -145,14 +141,15 @@ def test_convert_legacy_config(legacy_config, expected_config):
145
141
"strings_can_be_null" : False ,
146
142
},
147
143
None ,
148
- id = "test_csv_only_required_options" ),
144
+ id = "test_csv_only_required_options" ,
145
+ ),
149
146
pytest .param (
150
147
"csv" ,
151
148
{},
152
149
{
153
150
"filetype" : "csv" ,
154
151
"delimiter" : "," ,
155
- "quote_char" : " \" " ,
152
+ "quote_char" : '"' ,
156
153
"encoding" : "utf8" ,
157
154
"double_quote" : True ,
158
155
"null_values" : ["" , "null" , "NULL" , "N/A" , "NA" , "NaN" , "None" ],
@@ -162,23 +159,26 @@ def test_convert_legacy_config(legacy_config, expected_config):
162
159
"strings_can_be_null" : False ,
163
160
},
164
161
None ,
165
- id = "test_csv_empty_format" ),
162
+ id = "test_csv_empty_format" ,
163
+ ),
166
164
pytest .param (
167
165
"csv" ,
168
166
{
169
- "additional_reader_options" : "{ \ " not_valid\ " : \ " at all}" ,
167
+ "additional_reader_options" : '{ "not_valid": "at all}' ,
170
168
},
171
169
None ,
172
170
ValueError ,
173
- id = "test_malformed_additional_reader_options" ),
171
+ id = "test_malformed_additional_reader_options" ,
172
+ ),
174
173
pytest .param (
175
174
"csv" ,
176
175
{
177
- "advanced_options" : "{ \ " not_valid\ " : \ " at all}" ,
176
+ "advanced_options" : '{ "not_valid": "at all}' ,
178
177
},
179
178
None ,
180
179
ValueError ,
181
- id = "test_malformed_advanced_options" ),
180
+ id = "test_malformed_advanced_options" ,
181
+ ),
182
182
pytest .param (
183
183
"jsonl" ,
184
184
{
@@ -187,11 +187,10 @@ def test_convert_legacy_config(legacy_config, expected_config):
187
187
"unexpected_field_behavior" : "ignore" ,
188
188
"block_size" : 0 ,
189
189
},
190
- {
191
- "filetype" : "jsonl"
192
- },
190
+ {"filetype" : "jsonl" },
193
191
None ,
194
- id = "test_jsonl_format" ),
192
+ id = "test_jsonl_format" ,
193
+ ),
195
194
pytest .param (
196
195
"parquet" ,
197
196
{
@@ -200,22 +199,20 @@ def test_convert_legacy_config(legacy_config, expected_config):
200
199
"batch_size" : 65536 ,
201
200
"buffer_size" : 100 ,
202
201
},
203
- {
204
- "filetype" : "parquet"
205
- },
202
+ {"filetype" : "parquet" , "decimal_as_float" : True },
206
203
None ,
207
- id = "test_parquet_format" ),
204
+ id = "test_parquet_format" ,
205
+ ),
208
206
pytest .param (
209
207
"avro" ,
210
208
{
211
209
"filetype" : "avro" ,
212
210
},
213
- {
214
- "filetype" : "avro"
215
- },
211
+ {"filetype" : "avro" },
216
212
None ,
217
- id = "test_avro_format" ),
218
- ]
213
+ id = "test_avro_format" ,
214
+ ),
215
+ ],
219
216
)
220
217
def test_convert_file_format (file_type , legacy_format_config , expected_format_config , expected_error ):
221
218
legacy_config = {
@@ -225,7 +222,6 @@ def test_convert_file_format(file_type, legacy_format_config, expected_format_co
225
222
"bucket" : "test_bucket" ,
226
223
"aws_access_key_id" : "some_access_key" ,
227
224
"aws_secret_access_key" : "some_secret" ,
228
-
229
225
},
230
226
"format" : legacy_format_config ,
231
227
"path_pattern" : f"**/*.{ file_type } " ,
@@ -241,9 +237,9 @@ def test_convert_file_format(file_type, legacy_format_config, expected_format_co
241
237
"file_type" : file_type ,
242
238
"globs" : [f"**/*.{ file_type } " ],
243
239
"validation_policy" : "Emit Record" ,
244
- "format" : expected_format_config
240
+ "format" : expected_format_config ,
245
241
}
246
- ]
242
+ ],
247
243
}
248
244
249
245
parsed_legacy_config = SourceS3Spec (** legacy_config )
0 commit comments