Skip to content

Commit 98da5f8

Browse files
authored
Update Optim to DecoupledSGD in Notebooks (#3554)
1 parent 9cb3364 commit 98da5f8

File tree

4 files changed

+14
-9
lines changed

4 files changed

+14
-9
lines changed

composer/trainer/trainer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2308,9 +2308,9 @@ def fit(
23082308
) == torch.optim.SGD and version.parse(torch.__version__) >= version.parse('2.4.0'):
23092309
raise ValueError(
23102310
'PyTorch 2.4 breaks (distributed) checkpointing with SGD. '
2311-
'Please use a different optimizer, e.g. composer.optim.DecoupledSGDW '
2312-
'instead. See https://github.com/pytorch/pytorch/issues/133415 '
2313-
'for further information.',
2311+
'Please use a different optimizer, e.g. composer.optim.DecoupledSGDW, '
2312+
'instead or downgrade to PyTorch <2.4. See ',
2313+
'https://github.com/pytorch/pytorch/issues/133415 for further information.',
23142314
)
23152315

23162316
if self.state.max_duration is None:

examples/TPU_Training_in_composer.ipynb

+5-3
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
"# %pip install 'mosaicml @ git+https://github.com/mosaicml/composer.git'\"\n",
5959
"\n",
6060
"from composer import Trainer\n",
61-
"from composer.models import ComposerClassifier"
61+
"from composer.models import ComposerClassifier\n",
62+
"from composer.optim import DecoupledSGDW"
6263
]
6364
},
6465
{
@@ -166,10 +167,11 @@
166167
"\n",
167168
"model = model.to(xm.xla_device())\n",
168169
"\n",
169-
"optimizer = torch.optim.SGD(\n",
170+
"optimizer = DecoupledSGDW(\n",
170171
" model.parameters(),\n",
171172
" lr=0.02,\n",
172-
" momentum=0.9)\n"
173+
" momentum=0.9,\n",
174+
")"
173175
]
174176
},
175177
{

examples/exporting_for_inference.ipynb

+4-2
Original file line numberDiff line numberDiff line change
@@ -304,8 +304,9 @@
304304
"import torch\n",
305305
"from composer import Trainer\n",
306306
"from composer.algorithms import SqueezeExcite\n",
307+
"from composer.optim import DecoupledSGDW\n",
307308
"\n",
308-
"optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
309+
"optimizer = DecoupledSGDW(model.parameters(), lr=0.01)\n",
309310
"scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)\n",
310311
"\n",
311312
"trainer = Trainer(\n",
@@ -318,7 +319,8 @@
318319
" callbacks=[export_callback],\n",
319320
" max_duration='2ep',\n",
320321
" save_interval='1ep',\n",
321-
" save_overwrite=True)\n",
322+
" save_overwrite=True,\n",
323+
")\n",
322324
"trainer.fit()"
323325
]
324326
},

examples/migrate_from_ptl.ipynb

+2-1
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,10 @@
301301
"source": [
302302
"from composer import Trainer\n",
303303
"from composer.algorithms import BlurPool\n",
304+
"from composer.optim import DecoupledSGDW\n",
304305
"\n",
305306
"model = MosaicResnet()\n",
306-
"optimizer = torch.optim.SGD(\n",
307+
"optimizer = DecoupledSGDW(\n",
307308
" model.parameters(),\n",
308309
" lr=0.05,\n",
309310
" momentum=0.9,\n",

0 commit comments

Comments
 (0)