7
7
8
8
import pytest
9
9
from airbyte_cdk .utils import AirbyteTracedException
10
- from pandas import read_csv , read_excel
10
+ from pandas import read_csv , read_excel , testing
11
11
from paramiko import SSHException
12
12
from source_file .client import Client , URLFile
13
+ from source_file .utils import backoff_handler
13
14
from urllib3 .exceptions import ProtocolError
14
15
15
16
@@ -34,21 +35,22 @@ def csv_format_client():
34
35
35
36
36
37
@pytest .mark .parametrize (
37
- "storage, expected_scheme" ,
38
+ "storage, expected_scheme, url " ,
38
39
[
39
- ("GCS" , "gs://" ),
40
- ("S3" , "s3://" ),
41
- ("AZBLOB" , "azure://" ),
42
- ("HTTPS" , "https://" ),
43
- ("SSH" , "scp://" ),
44
- ("SCP" , "scp://" ),
45
- ("SFTP" , "sftp://" ),
46
- ("WEBHDFS" , "webhdfs://" ),
47
- ("LOCAL" , "file://" ),
40
+ ("GCS" , "gs://" , "http://localhost" ),
41
+ ("S3" , "s3://" , "http://localhost" ),
42
+ ("AZBLOB" , "azure://" , "http://localhost" ),
43
+ ("HTTPS" , "https://" , "http://localhost" ),
44
+ ("SSH" , "scp://" , "http://localhost" ),
45
+ ("SCP" , "scp://" , "http://localhost" ),
46
+ ("SFTP" , "sftp://" , "http://localhost" ),
47
+ ("WEBHDFS" , "webhdfs://" , "http://localhost" ),
48
+ ("LOCAL" , "file://" , "http://localhost" ),
49
+ ("WRONG" , "" , "" ),
48
50
],
49
51
)
50
- def test_storage_scheme (storage , expected_scheme ):
51
- urlfile = URLFile (provider = {"storage" : storage }, url = "http://localhost" )
52
+ def test_storage_scheme (storage , expected_scheme , url ):
53
+ urlfile = URLFile (provider = {"storage" : storage }, url = url )
52
54
assert urlfile .storage_scheme == expected_scheme
53
55
54
56
@@ -80,8 +82,27 @@ def test_load_dataframes_xlsb(config, absolute_path, test_files):
80
82
assert read_file .equals (expected )
81
83
82
84
83
- def test_load_nested_json (client , absolute_path , test_files ):
84
- f = f"{ absolute_path } /{ test_files } /formats/json/demo.json"
85
+ @pytest .mark .parametrize ("file_name, should_raise_error" , [("test.xlsx" , False ), ("test_one_line.xlsx" , True )])
86
+ def test_load_dataframes_xlsx (config , absolute_path , test_files , file_name , should_raise_error ):
87
+ config ["format" ] = "excel"
88
+ client = Client (** config )
89
+ f = f"{ absolute_path } /{ test_files } /{ file_name } "
90
+ if should_raise_error :
91
+ with pytest .raises (AirbyteTracedException ):
92
+ next (client .load_dataframes (fp = f ))
93
+ else :
94
+ read_file = next (client .load_dataframes (fp = f ))
95
+ expected = read_excel (f , engine = "openpyxl" )
96
+ assert read_file .equals (expected )
97
+
98
+
99
+ @pytest .mark .parametrize ("file_format, file_path" , [("json" , "formats/json/demo.json" ),
100
+ ("jsonl" , "formats/jsonl/jsonl_nested.jsonl" )])
101
+ def test_load_nested_json (client , config , absolute_path , test_files , file_format , file_path ):
102
+ if file_format == "jsonl" :
103
+ config ["format" ] = file_format
104
+ client = Client (** config )
105
+ f = f"{ absolute_path } /{ test_files } /{ file_path } "
85
106
with open (f , mode = "rb" ) as file :
86
107
assert client .load_nested_json (fp = file )
87
108
@@ -189,3 +210,11 @@ def patched_open(self):
189
210
assert call_count == 7
190
211
191
212
assert sleep_mock .call_count == 5
213
+
214
+
215
+ def test_backoff_handler (caplog ):
216
+ details = {"tries" : 1 , "wait" : 1 }
217
+ backoff_handler (details )
218
+ expected = [('airbyte' , 20 , 'Caught retryable error after 1 tries. Waiting 1 seconds then retrying...' )]
219
+
220
+ assert caplog .record_tuples == expected
0 commit comments