22
22
import mock
23
23
import pytest
24
24
import six
25
+ from six .moves import queue
25
26
26
27
import google .api_core .exceptions
27
28
@@ -1816,9 +1817,12 @@ def test_to_dataframe_w_bqstorage_nonempty(self):
1816
1817
bqstorage_client = mock .create_autospec (
1817
1818
bigquery_storage_v1beta1 .BigQueryStorageClient
1818
1819
)
1819
- session = bigquery_storage_v1beta1 .types .ReadSession (
1820
- streams = [{"name" : "/projects/proj/dataset/dset/tables/tbl/streams/1234" }]
1821
- )
1820
+ streams = [
1821
+ # Use two streams we want to check frames are read from each stream.
1822
+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/1234" },
1823
+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/5678" },
1824
+ ]
1825
+ session = bigquery_storage_v1beta1 .types .ReadSession (streams = streams )
1822
1826
session .avro_schema .schema = json .dumps (
1823
1827
{
1824
1828
"fields" : [
@@ -1836,20 +1840,25 @@ def test_to_dataframe_w_bqstorage_nonempty(self):
1836
1840
1837
1841
mock_rows = mock .create_autospec (reader .ReadRowsIterable )
1838
1842
mock_rowstream .rows .return_value = mock_rows
1843
+ page_items = [
1844
+ {"colA" : 1 , "colB" : "abc" , "colC" : 2.0 },
1845
+ {"colA" : - 1 , "colB" : "def" , "colC" : 4.0 },
1846
+ ]
1839
1847
1840
1848
def blocking_to_dataframe (* args , ** kwargs ):
1841
1849
# Sleep for longer than the waiting interval so that we know we're
1842
1850
# only reading one page per loop at most.
1843
1851
time .sleep (2 * mut ._PROGRESS_INTERVAL )
1844
- return pandas .DataFrame (
1845
- {"colA" : [1 , - 1 ], "colB" : ["abc" , "def" ], "colC" : [2.0 , 4.0 ]},
1846
- columns = ["colA" , "colB" , "colC" ],
1847
- )
1852
+ return pandas .DataFrame (page_items , columns = ["colA" , "colB" , "colC" ])
1848
1853
1849
1854
mock_page = mock .create_autospec (reader .ReadRowsPage )
1850
1855
mock_page .to_dataframe .side_effect = blocking_to_dataframe
1851
- mock_pages = mock .PropertyMock (return_value = (mock_page , mock_page , mock_page ))
1852
- type(mock_rows ).pages = mock_pages
1856
+ mock_pages = (mock_page , mock_page , mock_page )
1857
+ type(mock_rows ).pages = mock .PropertyMock (return_value = mock_pages )
1858
+
1859
+ # Test that full queue errors are ignored.
1860
+ mock_queue = mock .create_autospec (mut ._NoopProgressBarQueue )
1861
+ mock_queue ().put_nowait .side_effect = queue .Full
1853
1862
1854
1863
schema = [
1855
1864
schema .SchemaField ("colA" , "IGNORED" ),
@@ -1866,17 +1875,100 @@ def blocking_to_dataframe(*args, **kwargs):
1866
1875
selected_fields = schema ,
1867
1876
)
1868
1877
1869
- with mock .patch (
1878
+ with mock .patch . object ( mut , "_NoopProgressBarQueue" , mock_queue ), mock . patch (
1870
1879
"concurrent.futures.wait" , wraps = concurrent .futures .wait
1871
1880
) as mock_wait :
1872
1881
got = row_iterator .to_dataframe (bqstorage_client = bqstorage_client )
1873
1882
1883
+ # Are the columns in the expected order?
1874
1884
column_names = ["colA" , "colC" , "colB" ]
1875
1885
self .assertEqual (list (got ), column_names )
1876
- self .assertEqual (len (got .index ), 6 )
1886
+
1887
+ # Have expected number of rows?
1888
+ total_pages = len (streams ) * len (mock_pages )
1889
+ total_rows = len (page_items ) * total_pages
1890
+ self .assertEqual (len (got .index ), total_rows )
1891
+
1877
1892
# Make sure that this test looped through multiple progress intervals.
1878
1893
self .assertGreaterEqual (mock_wait .call_count , 2 )
1879
1894
1895
+ # Make sure that this test pushed to the progress queue.
1896
+ self .assertEqual (mock_queue ().put_nowait .call_count , total_pages )
1897
+
1898
+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
1899
+ @unittest .skipIf (
1900
+ bigquery_storage_v1beta1 is None , "Requires `google-cloud-bigquery-storage`"
1901
+ )
1902
+ @unittest .skipIf (tqdm is None , "Requires `tqdm`" )
1903
+ @mock .patch ("tqdm.tqdm" )
1904
+ def test_to_dataframe_w_bqstorage_updates_progress_bar (self , tqdm_mock ):
1905
+ from google .cloud .bigquery import schema
1906
+ from google .cloud .bigquery import table as mut
1907
+ from google .cloud .bigquery_storage_v1beta1 import reader
1908
+
1909
+ # Speed up testing.
1910
+ mut ._PROGRESS_INTERVAL = 0.01
1911
+
1912
+ bqstorage_client = mock .create_autospec (
1913
+ bigquery_storage_v1beta1 .BigQueryStorageClient
1914
+ )
1915
+ streams = [
1916
+ # Use two streams we want to check that progress bar updates are
1917
+ # sent from each stream.
1918
+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/1234" },
1919
+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/5678" },
1920
+ ]
1921
+ session = bigquery_storage_v1beta1 .types .ReadSession (streams = streams )
1922
+ session .avro_schema .schema = json .dumps ({"fields" : [{"name" : "testcol" }]})
1923
+ bqstorage_client .create_read_session .return_value = session
1924
+
1925
+ mock_rowstream = mock .create_autospec (reader .ReadRowsStream )
1926
+ bqstorage_client .read_rows .return_value = mock_rowstream
1927
+
1928
+ mock_rows = mock .create_autospec (reader .ReadRowsIterable )
1929
+ mock_rowstream .rows .return_value = mock_rows
1930
+ mock_page = mock .create_autospec (reader .ReadRowsPage )
1931
+ page_items = [- 1 , 0 , 1 ]
1932
+ type(mock_page ).num_items = mock .PropertyMock (return_value = len (page_items ))
1933
+
1934
+ def blocking_to_dataframe (* args , ** kwargs ):
1935
+ # Sleep for longer than the waiting interval. This ensures the
1936
+ # progress_queue gets written to more than once because it gives
1937
+ # the worker->progress updater time to sum intermediate updates.
1938
+ time .sleep (2 * mut ._PROGRESS_INTERVAL )
1939
+ return pandas .DataFrame ({"testcol" : page_items })
1940
+
1941
+ mock_page .to_dataframe .side_effect = blocking_to_dataframe
1942
+ mock_pages = (mock_page , mock_page , mock_page , mock_page , mock_page )
1943
+ type(mock_rows ).pages = mock .PropertyMock (return_value = mock_pages )
1944
+
1945
+ schema = [schema .SchemaField ("testcol" , "IGNORED" )]
1946
+
1947
+ row_iterator = mut .RowIterator (
1948
+ _mock_client (),
1949
+ None , # api_request: ignored
1950
+ None , # path: ignored
1951
+ schema ,
1952
+ table = mut .TableReference .from_string ("proj.dset.tbl" ),
1953
+ selected_fields = schema ,
1954
+ )
1955
+
1956
+ row_iterator .to_dataframe (
1957
+ bqstorage_client = bqstorage_client , progress_bar_type = "tqdm"
1958
+ )
1959
+
1960
+ # Make sure that this test updated the progress bar once per page from
1961
+ # each stream.
1962
+ total_pages = len (streams ) * len (mock_pages )
1963
+ expected_total_rows = total_pages * len (page_items )
1964
+ progress_updates = [
1965
+ args [0 ] for args , kwargs in tqdm_mock ().update .call_args_list
1966
+ ]
1967
+ # Should have sent >1 update due to delay in blocking_to_dataframe.
1968
+ self .assertGreater (len (progress_updates ), 1 )
1969
+ self .assertEqual (sum (progress_updates ), expected_total_rows )
1970
+ tqdm_mock ().close .assert_called_once ()
1971
+
1880
1972
@unittest .skipIf (pandas is None , "Requires `pandas`" )
1881
1973
@unittest .skipIf (
1882
1974
bigquery_storage_v1beta1 is None , "Requires `google-cloud-bigquery-storage`"
0 commit comments