Source code for detectron2.solver.lr_scheduler

# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import math
from bisect import bisect_right
from typing import List
import torch
from fvcore.common.param_scheduler import (
    CompositeParamScheduler,
    ConstantParamScheduler,
    LinearParamScheduler,
    ParamScheduler,
)

logger = logging.getLogger(__name__)


[docs]class WarmupParamScheduler(CompositeParamScheduler): """ Add an initial warmup stage to another scheduler. """
[docs] def __init__( self, scheduler: ParamScheduler, warmup_factor: float, warmup_length: float, warmup_method: str = "linear", rescale_interval: bool = False, ): """ Args: scheduler: warmup will be added at the beginning of this scheduler warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001 warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire training, e.g. 0.01 warmup_method: one of "linear" or "constant" rescale_interval: whether we will rescale the interval of the scheduler after warmup """ end_value = scheduler(warmup_length) # the value to reach when warmup ends start_value = warmup_factor * scheduler(0.0) if warmup_method == "constant": warmup = ConstantParamScheduler(start_value) elif warmup_method == "linear": warmup = LinearParamScheduler(start_value, end_value) else: raise ValueError("Unknown warmup method: {}".format(warmup_method)) super().__init__( [warmup, scheduler], interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"], lengths=[warmup_length, 1 - warmup_length], )
[docs]class LRMultiplier(torch.optim.lr_scheduler._LRScheduler): """ A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the learning rate of each param in the optimizer. Every step, the learning rate of each parameter becomes its initial value multiplied by the output of the given :class:`ParamScheduler`. The absolute learning rate value of each parameter can be different. This scheduler can be used as long as the relative scale among them do not change during training. Examples: :: LRMultiplier( opt, WarmupParamScheduler( MultiStepParamScheduler( [1, 0.1, 0.01], milestones=[60000, 80000], num_updates=90000, ), 0.001, 100 / 90000 ), max_iter=90000 ) """ # NOTES: in the most general case, every LR can use its own scheduler. # Supporting this requires interaction with the optimizer when its parameter # group is initialized. For example, classyvision implements its own optimizer # that allows different schedulers for every parameter group. # To avoid this complexity, we use this class to support the most common cases # where the relative scale among all LRs stay unchanged during training. In this # case we only need a total of one scheduler that defines the relative LR multiplier.
[docs] def __init__( self, optimizer: torch.optim.Optimizer, multiplier: ParamScheduler, max_iter: int, last_iter: int = -1, ): """ Args: optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``. ``last_iter`` is the same as ``last_epoch``. multiplier: a fvcore ParamScheduler that defines the multiplier on every LR of the optimizer max_iter: the total number of training iterations """ if not isinstance(multiplier, ParamScheduler): raise ValueError( "_LRMultiplier(multiplier=) must be an instance of fvcore " f"ParamScheduler. Got {multiplier} instead." ) self._multiplier = multiplier self._max_iter = max_iter super().__init__(optimizer, last_epoch=last_iter)
[docs] def state_dict(self): # fvcore schedulers are stateless. Only keep pytorch scheduler states return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
[docs] def get_lr(self) -> List[float]: multiplier = self._multiplier(self.last_epoch / self._max_iter) return [base_lr * multiplier for base_lr in self.base_lrs]
""" Content below is no longer needed! """ # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes # only on epoch boundaries. We typically use iteration based schedules instead. # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean # "iteration" instead. # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): def __init__( self, optimizer: torch.optim.Optimizer, milestones: List[int], gamma: float = 0.1, warmup_factor: float = 0.001, warmup_iters: int = 1000, warmup_method: str = "linear", last_epoch: int = -1, ): logger.warning( "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" ) if not list(milestones) == sorted(milestones): raise ValueError( "Milestones should be a list of" " increasing integers. Got {}", milestones ) self.milestones = milestones self.gamma = gamma self.warmup_factor = warmup_factor self.warmup_iters = warmup_iters self.warmup_method = warmup_method super().__init__(optimizer, last_epoch) def get_lr(self) -> List[float]: warmup_factor = _get_warmup_factor_at_iter( self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor ) return [ base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) for base_lr in self.base_lrs ] def _compute_values(self) -> List[float]: # The new interface return self.get_lr() class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler): def __init__( self, optimizer: torch.optim.Optimizer, max_iters: int, warmup_factor: float = 0.001, warmup_iters: int = 1000, warmup_method: str = "linear", last_epoch: int = -1, ): logger.warning( "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" ) self.max_iters = max_iters self.warmup_factor = warmup_factor self.warmup_iters = warmup_iters self.warmup_method = warmup_method super().__init__(optimizer, last_epoch) def get_lr(self) -> List[float]: warmup_factor = _get_warmup_factor_at_iter( self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor ) # Different definitions of half-cosine with warmup are possible. For # simplicity we multiply the standard half-cosine schedule by the warmup # factor. An alternative is to start the period of the cosine at warmup_iters # instead of at 0. In the case that warmup_iters << max_iters the two are # very close to each other. return [ base_lr * warmup_factor * 0.5 * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) for base_lr in self.base_lrs ] def _compute_values(self) -> List[float]: # The new interface return self.get_lr() def _get_warmup_factor_at_iter( method: str, iter: int, warmup_iters: int, warmup_factor: float ) -> float: """ Return the learning rate warmup factor at a specific iteration. See :paper:`ImageNet in 1h` for more details. Args: method (str): warmup method; either "constant" or "linear". iter (int): iteration at which to calculate the warmup factor. warmup_iters (int): the number of warmup iterations. warmup_factor (float): the base warmup factor (the meaning changes according to the method used). Returns: float: the effective warmup factor at the given iteration. """ if iter >= warmup_iters: return 1.0 if method == "constant": return warmup_factor elif method == "linear": alpha = iter / warmup_iters return warmup_factor * (1 - alpha) + alpha else: raise ValueError("Unknown warmup method: {}".format(method))