17
17
import unittest
18
18
19
19
import mock
20
+ import pytest
20
21
from six .moves import http_client
21
22
22
23
try :
@@ -59,6 +60,47 @@ def _make_connection(*responses):
59
60
return mock_conn
60
61
61
62
63
+ def _make_job_resource (
64
+ creation_time_ms = 1437767599006 ,
65
+ started_time_ms = 1437767600007 ,
66
+ ended_time_ms = 1437767601008 ,
67
+ started = False ,
68
+ ended = False ,
69
+ etag = "abc-def-hjk" ,
70
+ endpoint = "https://www.googleapis.com" ,
71
+ job_type = "load" ,
72
+ job_id = "a-random-id" ,
73
+ project_id = "some-project" ,
74
+
75
+ ):
76
+ resource = {
77
+ "configuration" : {job_type : {}},
78
+ "statistics" : {"creationTime" : creation_time_ms , job_type : {}},
79
+ "etag" : etag ,
80
+ "id" : "{}:{}" .format (project_id , job_id ),
81
+ "jobReference" : {"projectId" : project_id , "jobId" : job_id },
82
+ "selfLink" : "{}/bigquery/v2/projects/{}/jobs/{}" .format (
83
+ endpoint , project_id , job_id
84
+ ),
85
+ "user_email" : user_email ,
86
+ }
87
+
88
+ if started or ended :
89
+ resource ["statistics" ]["startTime" ] = started_time_ms
90
+
91
+ if ended :
92
+ resource ["statistics" ]["endTime" ] = ended_time_ms
93
+
94
+ if job_type == "query" :
95
+ resource ["configuration" ]["query" ]["destinationTable" ] = {
96
+ "projectId" : project_id ,
97
+ "datasetId" : "_temp_dataset" ,
98
+ "tableId" : "_temp_table" ,
99
+ }
100
+
101
+ return resource
102
+
103
+
62
104
class Test__error_result_to_exception (unittest .TestCase ):
63
105
def _call_fut (self , * args , ** kwargs ):
64
106
from google .cloud .bigquery import job
@@ -974,6 +1016,7 @@ class _Base(object):
974
1016
from google .cloud .bigquery .dataset import DatasetReference
975
1017
from google .cloud .bigquery .table import TableReference
976
1018
1019
+ ENDPOINT = "https://www.googleapis.com"
977
1020
PROJECT = "project"
978
1021
SOURCE1 = "http://example.com/source1.csv"
979
1022
DS_ID = "dataset_id"
@@ -994,7 +1037,9 @@ def _setUpConstants(self):
994
1037
self .WHEN = datetime .datetime .utcfromtimestamp (self .WHEN_TS ).replace (tzinfo = UTC )
995
1038
self .ETAG = "ETAG"
996
1039
self .FULL_JOB_ID = "%s:%s" % (self .PROJECT , self .JOB_ID )
997
- self .RESOURCE_URL = "http://example.com/path/to/resource"
1040
+ self .RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}" .format (
1041
+ self .ENDPOINT , self .PROJECT , self .JOB_ID
1042
+ )
998
1043
self .
USER_EMAIL = "[email protected] "
999
1044
1000
1045
def _table_ref (self , table_id ):
@@ -1004,30 +1049,19 @@ def _table_ref(self, table_id):
1004
1049
1005
1050
def _make_resource (self , started = False , ended = False ):
1006
1051
self ._setUpConstants ()
1007
- resource = {
1008
- "configuration" : {self .JOB_TYPE : {}},
1009
- "statistics" : {"creationTime" : self .WHEN_TS * 1000 , self .JOB_TYPE : {}},
1010
- "etag" : self .ETAG ,
1011
- "id" : self .FULL_JOB_ID ,
1012
- "jobReference" : {"projectId" : self .PROJECT , "jobId" : self .JOB_ID },
1013
- "selfLink" : self .RESOURCE_URL ,
1014
- "user_email" : self .USER_EMAIL ,
1015
- }
1016
-
1017
- if started or ended :
1018
- resource ["statistics" ]["startTime" ] = self .WHEN_TS * 1000
1019
-
1020
- if ended :
1021
- resource ["statistics" ]["endTime" ] = (self .WHEN_TS + 1000 ) * 1000
1022
-
1023
- if self .JOB_TYPE == "query" :
1024
- resource ["configuration" ]["query" ]["destinationTable" ] = {
1025
- "projectId" : self .PROJECT ,
1026
- "datasetId" : "_temp_dataset" ,
1027
- "tableId" : "_temp_table" ,
1028
- }
1029
-
1030
- return resource
1052
+ return _make_job_resource (
1053
+ creation_time_ms = int (self .WHEN_TS * 1000 ),
1054
+ started_time_ms = int (self .WHEN_TS * 1000 ),
1055
+ ended_time_ms = int (self .WHEN_TS * 1000 ) + 1000000 ,
1056
+ started = started ,
1057
+ ended = ended ,
1058
+ etag = self .ETAG ,
1059
+ endpoint = self .ENDPOINT ,
1060
+ job_type = self .JOB_TYPE ,
1061
+ job_id = self .JOB_ID ,
1062
+ project_id = self .PROJECT ,
1063
+ user_email = self .USER_EMAIL ,
1064
+ )
1031
1065
1032
1066
def _verifyInitialReadonlyProperties (self , job ):
1033
1067
# root elements of resource
@@ -4684,7 +4718,11 @@ def test_to_dataframe_bqstorage(self):
4684
4718
job .to_dataframe (bqstorage_client = bqstorage_client )
4685
4719
4686
4720
bqstorage_client .create_read_session .assert_called_once_with (
4687
- mock .ANY , "projects/{}" .format (self .PROJECT ), read_options = mock .ANY
4721
+ mock .ANY ,
4722
+ "projects/{}" .format (self .PROJECT ),
4723
+ read_options = mock .ANY ,
4724
+ # Use default number of streams for best performance.
4725
+ requested_streams = 0 ,
4688
4726
)
4689
4727
4690
4728
@unittest .skipIf (pandas is None , "Requires `pandas`" )
@@ -5039,3 +5077,93 @@ def test_from_api_repr_normal(self):
5039
5077
self .assertEqual (entry .pending_units , self .PENDING_UNITS )
5040
5078
self .assertEqual (entry .completed_units , self .COMPLETED_UNITS )
5041
5079
self .assertEqual (entry .slot_millis , self .SLOT_MILLIS )
5080
+
5081
+
5082
+ @pytest .mark .parametrize (
5083
+ "query,expected" ,
5084
+ (
5085
+ (None , False ),
5086
+ ("" , False ),
5087
+ ("select name, age from table" , False ),
5088
+ ("select name, age from table LIMIT 10;" , False ),
5089
+ ("select name, age from table order by other_column;" , True ),
5090
+ ("Select name, age From table Order By other_column" , True ),
5091
+ ("SELECT name, age FROM table ORDER BY other_column;" , True ),
5092
+ ("select name, age from table order\n by other_column" , True ),
5093
+ ("Select name, age From table Order\n By other_column;" , True ),
5094
+ ("SELECT name, age FROM table ORDER\n BY other_column" , True ),
5095
+ ("SelecT name, age froM table OrdeR \n \t BY other_column;" , True ),
5096
+ ),
5097
+ )
5098
+ def test__contains_order_by (query , expected ):
5099
+ from google .cloud .bigquery import job as mut
5100
+
5101
+ if expected :
5102
+ assert mut ._contains_order_by (query )
5103
+ else :
5104
+ assert not mut ._contains_order_by (query )
5105
+
5106
+
5107
+ @pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
5108
+ @pytest .mark .skipif (
5109
+ bigquery_storage_v1beta1 is None , reason = "Requires `google-cloud-bigquery-storage`"
5110
+ )
5111
+ @pytest .mark .parametrize (
5112
+ "query" ,
5113
+ (
5114
+ "select name, age from table order by other_column;" ,
5115
+ "Select name, age From table Order By other_column;" ,
5116
+ "SELECT name, age FROM table ORDER BY other_column;" ,
5117
+ "select name, age from table order\n by other_column;" ,
5118
+ "Select name, age From table Order\n By other_column;" ,
5119
+ "SELECT name, age FROM table ORDER\n BY other_column;" ,
5120
+ "SelecT name, age froM table OrdeR \n \t BY other_column;" ,
5121
+ ),
5122
+ )
5123
+ def test_to_dataframe_bqstorage_preserve_order (query ):
5124
+ from google .cloud .bigquery .job import QueryJob as target_class
5125
+
5126
+ job_resource = _make_job_resource (
5127
+ project_id = "test-project" , job_type = "query" , ended = True
5128
+ )
5129
+ job_resource ["configuration" ]["query" ]["query" ] = query
5130
+ job_resource ["status" ] = {"state" : "DONE" }
5131
+ get_query_results_resource = {
5132
+ "jobComplete" : True ,
5133
+ "jobReference" : {"projectId" : "test-project" , "jobId" : "test-job" },
5134
+ "schema" : {
5135
+ "fields" : [
5136
+ {"name" : "name" , "type" : "STRING" , "mode" : "NULLABLE" },
5137
+ {"name" : "age" , "type" : "INTEGER" , "mode" : "NULLABLE" },
5138
+ ]
5139
+ },
5140
+ "totalRows" : "4" ,
5141
+ }
5142
+ connection = _make_connection (get_query_results_resource , job_resource )
5143
+ client = _make_client (connection = connection )
5144
+ job = target_class .from_api_repr (job_resource , client )
5145
+ bqstorage_client = mock .create_autospec (
5146
+ bigquery_storage_v1beta1 .BigQueryStorageClient
5147
+ )
5148
+ session = bigquery_storage_v1beta1 .types .ReadSession ()
5149
+ session .avro_schema .schema = json .dumps (
5150
+ {
5151
+ "type" : "record" ,
5152
+ "name" : "__root__" ,
5153
+ "fields" : [
5154
+ {"name" : "name" , "type" : ["null" , "string" ]},
5155
+ {"name" : "age" , "type" : ["null" , "long" ]},
5156
+ ],
5157
+ }
5158
+ )
5159
+ bqstorage_client .create_read_session .return_value = session
5160
+
5161
+ job .to_dataframe (bqstorage_client = bqstorage_client )
5162
+
5163
+ bqstorage_client .create_read_session .assert_called_once_with (
5164
+ mock .ANY ,
5165
+ "projects/test-project" ,
5166
+ read_options = mock .ANY ,
5167
+ # Use a single stream to preserve row order.
5168
+ requested_streams = 1 ,
5169
+ )
0 commit comments