@@ -57,26 +57,25 @@ def read_bigquery(
57
57
dataset: The name of the dataset hosted in BigQuery in the format of
58
58
``dataset_id.table_id``. Both the dataset_id and table_id must exist
59
59
otherwise an exception will be raised.
60
- query: The query to execute.
61
- The dataset is created from the results of executing the query if provided .
62
- Otherwise, the entire dataset is read. For query syntax guidelines, see
60
+ query: The query to execute. The dataset is created from the results of
61
+ executing the query if provided. Otherwise, the entire dataset is read .
62
+ For query syntax guidelines, see
63
63
https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax
64
- parallelism:
65
- 2.9.3: The requested parallelism of the read. If -1, it will be
66
- automatically chosen based on the available cluster resources
67
- and estimated in-memory data size.
68
- 2.33.0: This argument is deprecated. Use ``override_num_blocks`` argument.
64
+ parallelism: 2.33.0, 2.42.0: This argument is deprecated. Use
65
+ ``override_num_blocks`` argument. 2.9.3: The requested parallelism of
66
+ the read. If -1, it will be automatically chosen based on the available
67
+ cluster resources and estimated in-memory data size.
69
68
ray_remote_args: kwargs passed to ray.remote in the read tasks.
70
- concurrency: Not supported in 2.9.3.
71
- 2.33.0: The maximum number of Ray tasks to run concurrently. Set this
72
- to control number of tasks to run concurrently. This doesn't change the
73
- total number of tasks run or the total number of output blocks. By default,
74
- concurrency is dynamically decided based on the available resources.
75
- override_num_blocks: Not supported in 2.9.3.
76
- 2.33.0: Override the number of output blocks from all read tasks.
77
- By default, the number of output blocks is dynamically decided based on
78
- input data size and available resources. You shouldn't manually set this
79
- value in most cases.
69
+ concurrency: Supported for 2.33.0 and 2.42.0 only: The maximum number of
70
+ Ray tasks to run concurrently. Set this to control number of tasks to
71
+ run concurrently. This doesn't change the total number of tasks run or
72
+ the total number of output blocks. By default, concurrency is
73
+ dynamically decided based on the available resources.
74
+ override_num_blocks: Supported for 2.33.0 and 2.42.0 only: Override the
75
+ number of output blocks from all read tasks. By default, the number of
76
+ output blocks is dynamically decided based on input data size and
77
+ available resources. You shouldn't manually set this value in most
78
+ cases.
80
79
81
80
Returns:
82
81
Dataset producing rows from the results of executing the query
@@ -96,7 +95,7 @@ def read_bigquery(
96
95
parallelism = parallelism ,
97
96
ray_remote_args = ray_remote_args ,
98
97
)
99
- elif ray .__version__ == "2.33.0" :
98
+ elif ray .__version__ in ( "2.33.0" , "2.42.0" ) :
100
99
return ray .data .read_datasource (
101
100
datasource = datasource ,
102
101
parallelism = parallelism ,
@@ -107,7 +106,7 @@ def read_bigquery(
107
106
else :
108
107
raise ImportError (
109
108
f"[Ray on Vertex AI]: Unsupported version { ray .__version__ } ."
110
- + "Only 2.33.0 and 2.9.3 are supported."
109
+ + "Only 2.42.0, 2. 33.0, and 2.9.3 are supported."
111
110
)
112
111
113
112
@@ -135,19 +134,19 @@ def write_bigquery(
135
134
The default number of retries is 10.
136
135
ray_remote_args: kwargs passed to ray.remote in the write tasks.
137
136
overwrite_table: Not supported in 2.9.3.
138
- 2.33.0: Whether the write will overwrite the table if it already
137
+ 2.33.0, 2.42.0 : Whether the write will overwrite the table if it already
139
138
exists. The default behavior is to overwrite the table.
140
139
If false, will append to the table if it exists.
141
140
concurrency: Not supported in 2.9.3.
142
- 2.33.0: The maximum number of Ray tasks to run concurrently. Set this
141
+ 2.33.0, 2.42.0 : The maximum number of Ray tasks to run concurrently. Set this
143
142
to control number of tasks to run concurrently. This doesn't change the
144
143
total number of tasks run or the total number of output blocks. By default,
145
144
concurrency is dynamically decided based on the available resources.
146
145
"""
147
146
if ray .__version__ == "2.4.0" :
148
147
raise RuntimeError (_V2_4_WARNING_MESSAGE )
149
148
150
- elif ray .__version__ == "2.9.3" or ray . __version__ == "2.33 .0" :
149
+ elif ray .__version__ in ( "2.9.3" , "2.33.0" , "2.42 .0" ) :
151
150
if ray .__version__ == "2.9.3" :
152
151
warnings .warn (_V2_9_WARNING_MESSAGE , DeprecationWarning , stacklevel = 1 )
153
152
if ray_remote_args is None :
@@ -174,7 +173,7 @@ def write_bigquery(
174
173
datasink = datasink ,
175
174
ray_remote_args = ray_remote_args ,
176
175
)
177
- elif ray .__version__ == "2.33.0" :
176
+ elif ray .__version__ in ( "2.33.0" , "2.42.0" ) :
178
177
datasink = _BigQueryDatasink (
179
178
project_id = project_id ,
180
179
dataset = dataset ,
@@ -189,5 +188,5 @@ def write_bigquery(
189
188
else :
190
189
raise ImportError (
191
190
f"[Ray on Vertex AI]: Unsupported version { ray .__version__ } ."
192
- + "Only 2.33.0 and 2.9.3 are supported."
191
+ + "Only 2.42.0, 2. 33.0 and 2.9.3 are supported."
193
192
)
0 commit comments