|
22 | 22 | from skorch.callbacks import Callback
|
23 | 23 |
|
24 | 24 |
|
25 |
| -__all__ = ['LRScheduler', 'WarmRestartLR', 'CyclicLR'] |
| 25 | +__all__ = ['LRScheduler', 'WarmRestartLR'] |
26 | 26 |
|
27 | 27 |
|
28 | 28 |
|
@@ -165,14 +165,11 @@ def _get_scheduler(self, net, policy, **scheduler_kwargs):
|
165 | 165 | """Return scheduler, based on indicated policy, with appropriate
|
166 | 166 | parameters.
|
167 | 167 | """
|
168 |
| - if policy not in [CyclicLR, ReduceLROnPlateau] and \ |
| 168 | + if policy not in [ReduceLROnPlateau] and \ |
169 | 169 | 'last_epoch' not in scheduler_kwargs:
|
170 | 170 | last_epoch = len(net.history) - 1
|
171 | 171 | scheduler_kwargs['last_epoch'] = last_epoch
|
172 | 172 |
|
173 |
| - if policy is CyclicLR and \ |
174 |
| - 'last_batch_idx' not in scheduler_kwargs: |
175 |
| - scheduler_kwargs['last_batch_idx'] = self.batch_idx_ - 1 |
176 | 173 | return policy(net.optimizer_, **scheduler_kwargs)
|
177 | 174 |
|
178 | 175 |
|
@@ -246,223 +243,3 @@ def get_lr(self):
|
246 | 243 | epoch_idx
|
247 | 244 | )
|
248 | 245 | return current_lrs.tolist()
|
249 |
| - |
250 |
| - |
251 |
| -class CyclicLR: |
252 |
| - """Sets the learning rate of each parameter group according to |
253 |
| - cyclical learning rate policy (CLR). The policy cycles the learning |
254 |
| - rate between two boundaries with a constant frequency, as detailed in |
255 |
| - the paper. |
256 |
| - The distance between the two boundaries can be scaled on a per-iteration |
257 |
| - or per-cycle basis. |
258 |
| -
|
259 |
| - Cyclical learning rate policy changes the learning rate after every batch. |
260 |
| - ``batch_step`` should be called after a batch has been used for training. |
261 |
| - To resume training, save `last_batch_idx` and use it to instantiate |
262 |
| - ``CycleLR``. |
263 |
| -
|
264 |
| - This class has three built-in policies, as put forth in the paper: |
265 |
| -
|
266 |
| - "triangular": |
267 |
| - A basic triangular cycle w/ no amplitude scaling. |
268 |
| - "triangular2": |
269 |
| - A basic triangular cycle that scales initial amplitude by half each |
270 |
| - cycle. |
271 |
| - "exp_range": |
272 |
| - A cycle that scales initial amplitude by gamma**(cycle iterations) |
273 |
| - at each cycle iteration. |
274 |
| -
|
275 |
| - This implementation was adapted from the github repo: |
276 |
| - `bckenstler/CLR <https://github.com/bckenstler/CLR>`_ |
277 |
| -
|
278 |
| - Parameters |
279 |
| - ---------- |
280 |
| - optimizer : torch.optimizer.Optimizer instance. |
281 |
| - Optimizer algorithm. |
282 |
| -
|
283 |
| - base_lr : float or list of float (default=1e-3) |
284 |
| - Initial learning rate which is the lower boundary in the |
285 |
| - cycle for each param groups (float) or each group (list). |
286 |
| -
|
287 |
| - max_lr : float or list of float (default=6e-3) |
288 |
| - Upper boundaries in the cycle for each parameter group (float) |
289 |
| - or each group (list). Functionally, it defines the cycle |
290 |
| - amplitude (max_lr - base_lr). The lr at any cycle is the sum |
291 |
| - of base_lr and some scaling of the amplitude; therefore max_lr |
292 |
| - may not actually be reached depending on scaling function. |
293 |
| -
|
294 |
| - step_size_up : int (default=2000) |
295 |
| - Number of training iterations in the increasing half of a cycle. |
296 |
| -
|
297 |
| - step_size_down : int (default=None) |
298 |
| - Number of training iterations in the decreasing half of a cycle. |
299 |
| - If step_size_down is None, it is set to step_size_up. |
300 |
| -
|
301 |
| - mode : str (default='triangular') |
302 |
| - One of {triangular, triangular2, exp_range}. Values correspond |
303 |
| - to policies detailed above. If scale_fn is not None, this |
304 |
| - argument is ignored. |
305 |
| -
|
306 |
| - gamma : float (default=1.0) |
307 |
| - Constant in 'exp_range' scaling function: |
308 |
| - gamma**(cycle iterations) |
309 |
| -
|
310 |
| - scale_fn : function (default=None) |
311 |
| - Custom scaling policy defined by a single argument lambda |
312 |
| - function, where 0 <= scale_fn(x) <= 1 for all x >= 0. |
313 |
| - mode paramater is ignored. |
314 |
| -
|
315 |
| - scale_mode : str (default='cycle') |
316 |
| - One of {'cycle', 'iterations'}. Defines whether scale_fn |
317 |
| - is evaluated on cycle number or cycle iterations (training |
318 |
| - iterations since start of cycle). |
319 |
| -
|
320 |
| - last_batch_idx : int (default=-1) |
321 |
| - The index of the last batch. |
322 |
| -
|
323 |
| - Examples |
324 |
| - -------- |
325 |
| -
|
326 |
| - >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) |
327 |
| - >>> scheduler = torch.optim.CyclicLR(optimizer) |
328 |
| - >>> data_loader = torch.utils.data.DataLoader(...) |
329 |
| - >>> for epoch in range(10): |
330 |
| - >>> for batch in data_loader: |
331 |
| - >>> scheduler.batch_step() |
332 |
| - >>> train_batch(...) |
333 |
| -
|
334 |
| - References |
335 |
| - ---------- |
336 |
| -
|
337 |
| - .. [1] Leslie N. Smith, 2017, "Cyclical Learning Rates for |
338 |
| - Training Neural Networks,". "ICLR" |
339 |
| - `<https://arxiv.org/abs/1506.01186>`_ |
340 |
| -
|
341 |
| - """ |
342 |
| - |
343 |
| - def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3, |
344 |
| - step_size_up=2000, step_size_down=None, mode='triangular', |
345 |
| - gamma=1., scale_fn=None, scale_mode='cycle', |
346 |
| - last_batch_idx=-1, step_size=None): |
347 |
| - |
348 |
| - # TODO: Remove class in 0.7 |
349 |
| - warnings.warn( |
350 |
| - "skorch.callbacks.CyclicLR is deprecated, please use " |
351 |
| - "skorch.callbacks.LRScheduler together with " |
352 |
| - "torch.optim.lr_scheduler.CyclicLR instead", |
353 |
| - DeprecationWarning |
354 |
| - ) |
355 |
| - |
356 |
| - if not isinstance(optimizer, Optimizer): |
357 |
| - raise TypeError('{} is not an Optimizer'.format( |
358 |
| - type(optimizer).__name__)) |
359 |
| - self.optimizer = optimizer |
360 |
| - |
361 |
| - # copied from torch.optim._lr_scheduler._LRScheduler |
362 |
| - base_lrs = _check_lr('base_lr', optimizer, base_lr) |
363 |
| - if last_batch_idx == -1: |
364 |
| - for lr, group in zip(base_lrs, optimizer.param_groups): |
365 |
| - group['lr'] = lr |
366 |
| - for group in optimizer.param_groups: |
367 |
| - group.setdefault('initial_lr', group['lr']) |
368 |
| - last_batch_idx = 0 |
369 |
| - else: |
370 |
| - for i, group in enumerate(optimizer.param_groups): |
371 |
| - if 'initial_lr' not in group: |
372 |
| - raise KeyError("param 'initial_lr' is not specified " |
373 |
| - "in param_groups[{}] when resuming an optimizer" |
374 |
| - .format(i)) |
375 |
| - self.base_lrs = list(map( |
376 |
| - lambda group: group['initial_lr'], optimizer.param_groups)) |
377 |
| - |
378 |
| - self.max_lrs = _check_lr('max_lr', optimizer, max_lr) |
379 |
| - |
380 |
| - # TODO: Remove warning in a future release |
381 |
| - if step_size is not None: |
382 |
| - warnings.warn( |
383 |
| - "step_size is deprecated in CycleLR, please use step_size_up " |
384 |
| - "and step_size_down instead", |
385 |
| - DeprecationWarning) |
386 |
| - step_size_up = step_size |
387 |
| - step_size_down = step_size |
388 |
| - |
389 |
| - step_size_down = step_size_down or step_size_up |
390 |
| - self.total_size = float(step_size_up + step_size_down) |
391 |
| - self.step_ratio = float(step_size_up) / self.total_size |
392 |
| - |
393 |
| - if mode not in ['triangular', 'triangular2', 'exp_range'] \ |
394 |
| - and scale_fn is None: |
395 |
| - raise ValueError('mode is invalid and scale_fn is None') |
396 |
| - |
397 |
| - self.mode = mode |
398 |
| - self.gamma = gamma |
399 |
| - |
400 |
| - if scale_fn is None: |
401 |
| - if self.mode == 'triangular': |
402 |
| - self.scale_fn = self._triangular_scale_fn |
403 |
| - self.scale_mode = 'cycle' |
404 |
| - elif self.mode == 'triangular2': |
405 |
| - self.scale_fn = self._triangular2_scale_fn |
406 |
| - self.scale_mode = 'cycle' |
407 |
| - elif self.mode == 'exp_range': |
408 |
| - self.scale_fn = self._exp_range_scale_fn |
409 |
| - self.scale_mode = 'iterations' |
410 |
| - else: |
411 |
| - self.scale_fn = scale_fn |
412 |
| - self.scale_mode = scale_mode |
413 |
| - |
414 |
| - self.batch_step(last_batch_idx) |
415 |
| - |
416 |
| - def step(self, epoch=None): |
417 |
| - """Not used by ``CyclicLR``, use batch_step instead.""" |
418 |
| - |
419 |
| - def batch_step(self, batch_idx=None): |
420 |
| - """Updates the learning rate for the batch index: ``batch_idx``. |
421 |
| - If ``batch_idx`` is None, ``CyclicLR`` will use an internal |
422 |
| - batch index to keep track of the index. |
423 |
| - """ |
424 |
| - if batch_idx is None: |
425 |
| - batch_idx = self.last_batch_idx + 1 |
426 |
| - self.last_batch_idx = batch_idx |
427 |
| - for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): |
428 |
| - param_group['lr'] = lr |
429 |
| - |
430 |
| - # pylint: disable=unused-argument |
431 |
| - def _triangular_scale_fn(self, x): |
432 |
| - """Cycle amplitude remains contant""" |
433 |
| - return 1. |
434 |
| - |
435 |
| - def _triangular2_scale_fn(self, x): |
436 |
| - """ |
437 |
| - Decreases the cycle amplitude by half after each period, |
438 |
| - while keeping the base lr constant. |
439 |
| - """ |
440 |
| - return 1 / (2. ** (x - 1)) |
441 |
| - |
442 |
| - def _exp_range_scale_fn(self, x): |
443 |
| - """ |
444 |
| - Scales the cycle amplitude by a factor ``gamma**x``, |
445 |
| - while keeping the base lr constant. |
446 |
| - """ |
447 |
| - return self.gamma**(x) |
448 |
| - |
449 |
| - def get_lr(self): |
450 |
| - """Calculates the learning rate at batch index: |
451 |
| - ``self.last_batch_idx``. |
452 |
| - """ |
453 |
| - cycle = np.floor(1 + self.last_batch_idx / self.total_size) |
454 |
| - x = 1 + self.last_batch_idx / self.total_size - cycle |
455 |
| - if x <= self.step_ratio: |
456 |
| - scale_factor = x / self.step_ratio |
457 |
| - else: |
458 |
| - scale_factor = (x-1)/(self.step_ratio-1) |
459 |
| - |
460 |
| - lrs = [] |
461 |
| - for base_lr, max_lr in zip(self.base_lrs, self.max_lrs): |
462 |
| - base_height = (max_lr - base_lr) * scale_factor |
463 |
| - if self.scale_mode == 'cycle': |
464 |
| - lr = base_lr + base_height * self.scale_fn(cycle) |
465 |
| - else: |
466 |
| - lr = base_lr + base_height * self.scale_fn(self.last_batch_idx) |
467 |
| - lrs.append(lr) |
468 |
| - return lrs |
0 commit comments