! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai训练回调
# 默认导出:callback.trainingfrom __future__ import annotations
from fastai.basics import *
from fastai.callback.progress import *
from fastai.callback.fp16 import *from nbdev.showdoc import *
from fastai.test_utils import *
from fastai.vision.all import *各种回调用于自定义训练行为
短期周期回调 -
class ShortEpochCallback(Callback):
"Fit just `pct` of an epoch, then stop"
def __init__(self,pct=0.01,short_valid=True): self.pct,self.short_valid = pct,short_valid
def after_batch(self):
if self.iter/self.n_iter < self.pct: return
if self.training: raise CancelTrainException
if self.short_valid: raise CancelValidExceptionlearn = synth_learner()
learn.fit(1, cbs=ShortEpochCallback())| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 00:00 |
learn = synth_learner()
learn.fit(1, cbs=ShortEpochCallback(short_valid=False))| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 8.432135 | 00:00 |
梯度累积 -
class GradientAccumulation(Callback):
"Accumulate gradients before updating weights"
order,run_valid = MixedPrecision.order-4,False
def __init__(self, n_acc=32): store_attr()
def before_fit(self): self.count=0
def after_loss(self): self.learn.loss_grad /= self.n_acc/find_bs(self.learn.yb)
def before_step(self):
"Skip weight update if we have not seen enough items"
self.learn.loss_grad *= self.n_acc/find_bs(self.learn.yb) # 日志正确损失
self.count += find_bs(self.learn.yb)
if self.count<self.n_acc: raise CancelBatchException() # 跳过步骤/清零梯度
else: self.count=0class GetGrads(Callback):
run_valid,order = False,GradientAccumulation.order+1
def before_step(self): self.grads=to_detach(L([p.grad.clone() for p in self.model.parameters()]))
def _test_acc(bs,n,cbs=None,cuda=False):
with no_random(99):
db=synth_dbunch(bs=bs,n_train=n,n_valid=n,cuda=cuda)
learn = synth_learner(data=db,cbs=[GetGrads]+L(cbs))
learn.fit(1, lr=0.01)
train,valid = learn.recorder.values[-1]
return train,valid,learn.get_grads.grads
acc_cb = GradientAccumulation(n_acc=8)
train1,valid1,grads1 = _test_acc(8,1)
train2,valid2,grads2 = _test_acc(1,8,acc_cb)
#梯度应相同,有效损失相同,训练损失不同
test_close(grads2,grads1)
test_close(valid2, valid1)
test_ne(train2, train1)| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 0.834062 | 0.295950 | 00:00 |
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 0.824550 | 0.295950 | 00:00 |
#|cuda
fp16_cb = MixedPrecision(init_scale=1024)
train1,valid1,grads1 = _test_acc(8,1, fp16_cb, cuda=True)
train2,valid2,grads2 = _test_acc(1,8, [acc_cb,fp16_cb], cuda=True)
test_close(grads2,grads1, eps=0.01)
test_close(valid2, valid1)
test_ne(train2, train1)| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 0.834062 | 0.295950 | 00:00 |
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 0.824550 | 0.295950 | 00:00 |
当每次累积的步数大于批次数时,参数(因此验证损失)完全不变:
learn = synth_learner()
learn.fit(1, lr=0.01, cbs=GradientAccumulation(n_acc=1000))
# 确保valid_loss没有变化
assert learn.recorder.values[-1][1] == learn.recorder.values[0][1]| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 20.987558 | 26.849480 | 00:00 |
梯度剪切 -
class GradientClip(Callback):
"Clip norm of gradients"
order=MixedPrecision.order+1
def __init__(self,max_norm:float=1., norm_type:float=2.0): store_attr()
def before_step(self): nn.utils.clip_grad_norm_(self.parameters(), self.max_norm, self.norm_type)通常情况下,如果我们使用的学习率过高,训练将会发散。这甚至发生在我们使用混合精度训练时,尽管通过动态损失缩放来避免无穷大,但仍然会发散:
fp16 = MixedPrecision()set_seed(99)
learn = synth_learner(lr=1.1, cuda=True)
learn.fit(3, cbs=fp16)| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 38.214138 | 25.269005 | 00:00 |
| 1 | 377.145508 | 890.010376 | 00:00 |
| 2 | 839.392883 | 9965.747070 | 00:00 |
通过添加 GradientClip 回调,梯度的 norm_type(默认值:2)会被限制为最多 max_norm(默认值:1),这可以避免损失发散:
set_seed(99)
learn = synth_learner(lr=1.1, cuda=True)
learn.fit(3, cbs=[GradientClip,fp16])| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 2.039428 | 2.372177 | 00:00 |
| 1 | 1.402425 | 0.300728 | 00:00 |
| 2 | 1.013548 | 0.332610 | 00:00 |
BnFreeze
bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)
def set_bn_eval(m:nn.Module, use_eval=True)->None:
"Set bn layers in eval mode for all recursive children of `m`."
for l in m.children():
if isinstance(l, bn_types) and not next(l.parameters()).requires_grad:
if use_eval: l.eval()
else: l.train()
set_bn_eval(l)
class BnFreeze(Callback):
run_after=TrainEvalCallback
"Freeze moving average statistics in all non-trainable batchnorm layers."
def before_train(self):
set_bn_eval(self.model)BnFreeze 在您想要训练两个具有共同特征提取器/主体的独立模型时非常有用。模型唯一不同的部分是您为迁移学习附加的头部。
Learner.freeze() 在这里不够,因为 BatchNorm 层默认是可训练的,并且正在跟踪批次的运行均值和标准差。为了使特征提取器完全匹配,您需要设置 train_bn=False,并且这些统计数据也需要被冻结,这正是 BnFreeze 的功能。
::: {#cell-26 .cell 0=‘缓’ 1=‘慢’}
path = untar_data(URLs.MNIST_TINY)
dls = ImageDataLoaders.from_folder(path, valid_pct=0.2):::
# 内存格式教程
在此教程中,我们将探索 PyTorch 中的内存格式,并了解何时使用不同的内存格式是合适的。我们将讨论使用 `channels_last` 和 `channels_first` 格式的优缺点,以及如何在模型和输入数据之间进行转换。
## 为什么选择不同的内存格式?
在深度学习中,输入数据通常是图像,每个图像都有三个维度:高度、宽度和通道。PyTorch 中可以选择两种内存格式:
- `channels_first`: 图像的形状为 `(batch_size, channels, height, width)`
- `channels_last`: 图像的形状为 `(batch_size, height, width, channels)`
对于某些操作,`channels_last` 格式在兼容性和性能上可能优于 `channels_first` 格式。特别是在处理大型图像数据时,调整内存格式可以显著提高性能。
## 创建一个示例
我们首先通过创建一个 `Learner` 来展示当仅使用 `train_bn=False` 时运行统计的不匹配情况...::: {#cell-28 .cell 0=‘缓’ 1=‘慢’}
learn1 = vision_learner(deepcopy(dls), resnet18, pretrained=True, train_bn=False):::
…并获取第一个BatchNorm层,并存储它的运行均值:
::: {#cell-30 .cell 0=‘缓’ 1=‘慢’}
m = learn1.model[0][1].running_mean.clone():::
您可以看到现在运行均值已发生变化:
::: {#cell-32 .cell 0=‘缓’ 1=‘慢’}
learn1.fit(1, lr=0.02)
test_ne(to_detach(learn1.model[0][1].running_mean), m)| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 1.148303 | 0.739404 | 00:12 |
:::
使用 BnFreeze 回调时,运行统计数据在训练过程中不会改变。这通常对于从迁移学习中获得良好结果非常重要。
::: {#cell-34 .cell 0=‘缓’ 1=‘慢’}
learn1 = vision_learner(deepcopy(dls), resnet18, pretrained=True, train_bn=False, cbs=BnFreeze)
m = learn1.model[0][1].running_mean.detach().clone()
learn1.fit(1, lr=0.02)
test_eq(to_detach(learn1.model[0][1].running_mean), m)| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 0.478594 | 0.270772 | 00:10 |
:::
导出 -
from nbdev import nbdev_export
nbdev_export()Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 01a_losses.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 10b_tutorial.albumentations.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_callback.core.ipynb.
Converted 13a_learner.ipynb.
Converted 13b_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 18a_callback.training.ipynb.
Converted 18b_callback.preds.ipynb.
Converted 19_callback.mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 21_vision.learner.ipynb.
Converted 22_tutorial.imagenette.ipynb.
Converted 23_tutorial.vision.ipynb.
Converted 24_tutorial.siamese.ipynb.
Converted 24_vision.gan.ipynb.
Converted 30_text.core.ipynb.
Converted 31_text.data.ipynb.
Converted 32_text.models.awdlstm.ipynb.
Converted 33_text.models.core.ipynb.
Converted 34_callback.rnn.ipynb.
Converted 35_tutorial.wikitext.ipynb.
Converted 36_text.models.qrnn.ipynb.
Converted 37_text.learner.ipynb.
Converted 38_tutorial.text.ipynb.
Converted 39_tutorial.transformers.ipynb.
Converted 40_tabular.core.ipynb.
Converted 41_tabular.data.ipynb.
Converted 42_tabular.model.ipynb.
Converted 43_tabular.learner.ipynb.
Converted 44_tutorial.tabular.ipynb.
Converted 45_collab.ipynb.
Converted 46_tutorial.collab.ipynb.
Converted 50_tutorial.datablock.ipynb.
Converted 60_medical.imaging.ipynb.
Converted 61_tutorial.medical_imaging.ipynb.
Converted 65_medical.text.ipynb.
Converted 70_callback.wandb.ipynb.
Converted 71_callback.tensorboard.ipynb.
Converted 72_callback.neptune.ipynb.
Converted 73_callback.captum.ipynb.
Converted 97_test_utils.ipynb.
Converted 99_pytorch_doc.ipynb.
Converted dev-setup.ipynb.
Converted index.ipynb.
Converted quick_start.ipynb.
Converted tutorial.ipynb.