@@ -60,8 +60,8 @@ def notebook_launcher(
60
60
61
61
<Tip warning={true}>
62
62
63
- To use this function absolutely zero calls to a CUDA device must be made in the notebook session before calling. If
64
- any have been made, you will need to restart the notebook and make sure no cells use any CUDA capability.
63
+ To use this function absolutely zero calls to a device must be made in the notebook session before calling. If any
64
+ have been made, you will need to restart the notebook and make sure no cells use any device capability.
65
65
66
66
Setting `ACCELERATE_DEBUG_MODE="1"` in your environment will run a test before truly launching to ensure that none
67
67
of those calls have been made.
@@ -76,11 +76,11 @@ def notebook_launcher(
76
76
Tuple of arguments to pass to the function (it will receive `*args`).
77
77
num_processes (`int`, *optional*):
78
78
The number of processes to use for training. Will default to 8 in Colab/Kaggle if a TPU is available, to
79
- the number of GPUs available otherwise.
79
+ the number of devices available otherwise.
80
80
mixed_precision (`str`, *optional*, defaults to `"no"`):
81
- If `fp16` or `bf16`, will use mixed precision training on multi-GPU .
81
+ If `fp16` or `bf16`, will use mixed precision training on multi-device .
82
82
use_port (`str`, *optional*, defaults to `"29500"`):
83
- The port to use to communicate between processes when launching a multi-GPU training.
83
+ The port to use to communicate between processes when launching a multi-device training.
84
84
master_addr (`str`, *optional*, defaults to `"127.0.0.1"`):
85
85
The address to use for communication between processes.
86
86
node_rank (`int`, *optional*, defaults to 0):
@@ -105,7 +105,7 @@ def notebook_launcher(
105
105
Example:
106
106
107
107
```python
108
- # Assume this is defined in a Jupyter Notebook on an instance with two GPUs
108
+ # Assume this is defined in a Jupyter Notebook on an instance with two devices
109
109
from accelerate import notebook_launcher
110
110
111
111
@@ -158,27 +158,27 @@ def train(*args):
158
158
else :
159
159
if num_processes is None :
160
160
raise ValueError (
161
- "You have to specify the number of GPUs you would like to use, add `num_processes=...` to your call."
161
+ "You have to specify the number of devices you would like to use, add `num_processes=...` to your call."
162
162
)
163
163
if node_rank >= num_nodes :
164
164
raise ValueError ("The node_rank must be less than the number of nodes." )
165
165
if num_processes > 1 :
166
- # Multi-GPU launch
166
+ # Multi-device launch
167
167
from torch .distributed .launcher .api import LaunchConfig , elastic_launch
168
168
from torch .multiprocessing import start_processes
169
169
from torch .multiprocessing .spawn import ProcessRaisedException
170
170
171
171
if len (AcceleratorState ._shared_state ) > 0 :
172
172
raise ValueError (
173
- "To launch a multi-GPU training from your notebook, the `Accelerator` should only be initialized "
173
+ "To launch a multi-device training from your notebook, the `Accelerator` should only be initialized "
174
174
"inside your training function. Restart your notebook and make sure no cells initializes an "
175
175
"`Accelerator`."
176
176
)
177
- # Check for specific libraries known to initialize CUDA that users constantly use
177
+ # Check for specific libraries known to initialize device that users constantly use
178
178
problematic_imports = are_libraries_initialized ("bitsandbytes" )
179
179
if len (problematic_imports ) > 0 :
180
180
err = (
181
- "Could not start distributed process. Libraries known to initialize CUDA upon import have been "
181
+ "Could not start distributed process. Libraries known to initialize device upon import have been "
182
182
"imported already. Please keep these imports inside your training function to try and help with this:"
183
183
)
184
184
for lib_name in problematic_imports :
@@ -203,24 +203,26 @@ def train(*args):
203
203
# process here (the other ones will be set be the launcher).
204
204
with patch_environment (** patched_env ):
205
205
# First dummy launch
206
+ device_type = torch .accelerator .current_accelerator ().type if hasattr (torch , "accelerator" ) else "cuda"
207
+ distributed_type = "MULTI_XPU" if device_type == "xpu" else "MULTI_GPU"
206
208
if os .environ .get ("ACCELERATE_DEBUG_MODE" , "false" ).lower () == "true" :
207
- launcher = PrepareForLaunch (test_launch , distributed_type = "MULTI_GPU" )
209
+ launcher = PrepareForLaunch (test_launch , distributed_type = distributed_type )
208
210
try :
209
211
start_processes (launcher , args = (), nprocs = num_processes , start_method = "fork" )
210
212
except ProcessRaisedException as e :
211
213
err = "An issue was found when verifying a stable environment for the notebook launcher."
212
- if "Cannot re-initialize CUDA in forked subprocess" in e .args [0 ]:
214
+ if f "Cannot re-initialize { device_type . upper () } in forked subprocess" in e .args [0 ]:
213
215
raise RuntimeError (
214
216
f"{ err } "
215
217
"This likely stems from an outside import causing issues once the `notebook_launcher()` is called. "
216
218
"Please review your imports and test them when running the `notebook_launcher()` to identify "
217
- "which one is problematic and causing CUDA to be initialized."
219
+ f "which one is problematic and causing { device_type . upper () } to be initialized."
218
220
) from e
219
221
else :
220
222
raise RuntimeError (f"{ err } The following error was raised: { e } " ) from e
221
223
# Now the actual launch
222
- launcher = PrepareForLaunch (function , distributed_type = "MULTI_GPU" )
223
- print (f"Launching training on { num_processes } GPUs ." )
224
+ launcher = PrepareForLaunch (function , distributed_type = distributed_type )
225
+ print (f"Launching training on { num_processes } { device_type . upper () } s ." )
224
226
try :
225
227
if rdzv_conf is None :
226
228
rdzv_conf = {}
@@ -244,23 +246,25 @@ def train(*args):
244
246
launch_config_kwargs ["log_line_prefix_template" ] = log_line_prefix_template
245
247
elastic_launch (config = LaunchConfig (** launch_config_kwargs ), entrypoint = function )(* args )
246
248
except ProcessRaisedException as e :
247
- if "Cannot re-initialize CUDA in forked subprocess" in e .args [0 ]:
249
+ if f "Cannot re-initialize { device_type . upper () } in forked subprocess" in e .args [0 ]:
248
250
raise RuntimeError (
249
- "CUDA has been initialized before the `notebook_launcher` could create a forked subprocess. "
251
+ f" { device_type . upper () } has been initialized before the `notebook_launcher` could create a forked subprocess. "
250
252
"This likely stems from an outside import causing issues once the `notebook_launcher()` is called. "
251
253
"Please review your imports and test them when running the `notebook_launcher()` to identify "
252
- "which one is problematic and causing CUDA to be initialized."
254
+ f "which one is problematic and causing { device_type . upper () } to be initialized."
253
255
) from e
254
256
else :
255
257
raise RuntimeError (f"An issue was found when launching the training: { e } " ) from e
256
258
257
259
else :
258
- # No need for a distributed launch otherwise as it's either CPU, GPU or MPS.
260
+ # No need for a distributed launch otherwise as it's either CPU, GPU, XPU or MPS.
259
261
if is_mps_available ():
260
262
os .environ ["PYTORCH_ENABLE_MPS_FALLBACK" ] = "1"
261
263
print ("Launching training on MPS." )
262
264
elif torch .cuda .is_available ():
263
265
print ("Launching training on one GPU." )
266
+ elif torch .xpu .is_available ():
267
+ print ("Launching training on one XPU." )
264
268
else :
265
269
print ("Launching training on CPU." )
266
270
function (* args )
0 commit comments