! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastaiTensorboard
from __future__ import annotations
from fastai.basics import *from nbdev import show_doc与 tensorboard 的集成
首先,您需要使用以下命令安装 tensorboard:
pip install tensorboard
然后在您的终端中启动 tensorboard:
tensorboard --logdir=runs
您可以更改 logdir,只要它与您传递给 TensorBoardCallback 的 log_dir 匹配(默认是在工作目录中的 runs)。
Tensorboard 嵌入投影仪支持
Tensorboard 嵌入投影仪目前仅支持图像分类
在训练期间导出图像特征
Tensorboard 嵌入投影仪 在训练期间支持 TensorBoardCallback (设置参数 projector=True)。验证集的嵌入将在每个训练周期后写入。
cbs = [TensorBoardCallback(projector=True)]
learn = vision_learner(dls, resnet18, metrics=accuracy)
learn.fit_one_cycle(3, cbs=cbs)
在推理过程中导出图像特征
要为自定义数据集编写嵌入(例如,在加载学习者之后),使用 TensorBoardProjectorCallback。手动将回调添加到学习者中。
learn = load_learner('path/to/export.pkl')
learn.add_cb(TensorBoardProjectorCallback())
dl = learn.dls.test_dl(files, with_labels=True)
_ = learn.get_preds(dl=dl)
如果使用自定义模型(非 fastai-resnet),请将提取嵌入的层作为回调参数传递。
layer = learn.model[1][1]
cbs = [TensorBoardProjectorCallback(layer=layer)]
preds = learn.get_preds(dl=dl, cbs=cbs)
从语言模型导出词嵌入
从语言模型中导出词嵌入(测试过AWD_LSTM(fast.ai)和GPT2 / BERT(transformers)),但适用于所有包含嵌入层的模型。
对于 fast.ai 的 TextLearner 或 LMLearner,只需传递学习器 - 嵌入层和词汇将自动提取:
dls = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test')
learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
projector_word_embeddings(learn=learn, limit=2000, start=2000)
对于其他语言模型——比如 transformers 库 中的模型——你需要传递层和词汇。以下是一个 BERT 模型的示例。
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
# 获取词嵌入层
layer = model.embeddings.word_embeddings
# 获取并排序词汇
vocab_dict = tokenizer.get_vocab()
vocab = [k for k, v in sorted(vocab_dict.items(), key=lambda x: x[1])]
# 为 tb projector 写入嵌入
projector_word_embeddings(layer=layer, vocab=vocab, limit=2000, start=2000)
import tensorboard
from torch.utils.tensorboard import SummaryWriter
from fastai.callback.fp16 import ModelToHalf
from fastai.callback.hook import hook_outputclass TensorBoardBaseCallback(Callback):
order = Recorder.order+1
"Base class for tensorboard callbacks"
def __init__(self): self.run_projector = False
def after_pred(self):
if self.run_projector: self.feat = _add_projector_features(self.learn, self.h, self.feat)
def after_validate(self):
if not self.run_projector: return
self.run_projector = False
self._remove()
_write_projector_embedding(self.learn, self.writer, self.feat)
def after_fit(self):
if self.run: self.writer.close()
def _setup_projector(self):
self.run_projector = True
self.h = hook_output(self.learn.model[1][1] if not self.layer else self.layer)
self.feat = {}
def _setup_writer(self): self.writer = SummaryWriter(log_dir=self.log_dir)
def __del__(self): self._remove()
def _remove(self):
if getattr(self, 'h', None): self.h.remove()show_doc(TensorBoardBaseCallback)TensorBoardBaseCallback
TensorBoardBaseCallback ()
Basic class handling tweaks of the training loop by changing a Learner in various events
class TensorBoardCallback(TensorBoardBaseCallback):
"Saves model topology, losses & metrics for tensorboard and tensorboard projector during training"
def __init__(self, log_dir=None, trace_model=True, log_preds=True, n_preds=9, projector=False, layer=None):
super().__init__()
store_attr()
def before_fit(self):
self.run = not hasattr(self.learn, 'lr_finder') and not hasattr(self, "gather_preds") and rank_distrib()==0
if not self.run: return
self._setup_writer()
if self.trace_model:
if hasattr(self.learn, 'mixed_precision'):
raise Exception("Can't trace model in mixed precision, pass `trace_model=False` or don't use FP16.")
b = self.dls.one_batch()
self.learn._split(b)
self.writer.add_graph(self.model, *self.xb)
def after_batch(self):
self.writer.add_scalar('train_loss', self.smooth_loss, self.train_iter)
for i,h in enumerate(self.opt.hypers):
for k,v in h.items(): self.writer.add_scalar(f'{k}_{i}', v, self.train_iter)
def after_epoch(self):
for n,v in zip(self.recorder.metric_names[2:-1], self.recorder.log[2:-1]):
self.writer.add_scalar(n, v, self.train_iter)
if self.log_preds:
b = self.dls.valid.one_batch()
self.learn.one_batch(0, b)
preds = getcallable(self.loss_func, 'activation')(self.pred)
out = getcallable(self.loss_func, 'decodes')(preds)
x,y,its,outs = self.dls.valid.show_results(b, out, show=False, max_n=self.n_preds)
tensorboard_log(x, y, its, outs, self.writer, self.train_iter)
def before_validate(self):
if self.projector: self._setup_projector()show_doc(TensorBoardCallback)TensorBoardCallback
TensorBoardCallback (log_dir=None, trace_model=True, log_preds=True, n_preds=9, projector=False, layer=None)
Saves model topology, losses & metrics for tensorboard and tensorboard projector during training
class TensorBoardProjectorCallback(TensorBoardBaseCallback):
"Extracts and exports image featuers for tensorboard projector during inference"
def __init__(self, log_dir=None, layer=None):
super().__init__()
store_attr()
def before_fit(self):
self.run = not hasattr(self.learn, 'lr_finder') and hasattr(self, "gather_preds") and rank_distrib()==0
if not self.run: return
self._setup_writer()
def before_validate(self):
self._setup_projector()show_doc(TensorBoardProjectorCallback)TensorBoardProjectorCallback
TensorBoardProjectorCallback (log_dir=None, layer=None)
Extracts and exports image featuers for tensorboard projector during inference
def _write_projector_embedding(learn, writer, feat):
lbls = [learn.dl.vocab[l] for l in feat['lbl']] if getattr(learn.dl, 'vocab', None) else None
vecs = feat['vec'].squeeze()
writer.add_embedding(vecs, metadata=lbls, label_img=feat['img'], global_step=learn.train_iter)def _add_projector_features(learn, hook, feat):
img = _normalize_for_projector(learn.x)
first_epoch = True if learn.iter == 0 else False
feat['vec'] = hook.stored if first_epoch else torch.cat((feat['vec'], hook.stored),0)
feat['img'] = img if first_epoch else torch.cat((feat['img'], img),0)
if getattr(learn.dl, 'vocab', None):
feat['lbl'] = learn.y if first_epoch else torch.cat((feat['lbl'], learn.y),0)
return featdef _get_embeddings(model, layer):
layer = model[0].encoder if layer == None else layer
return layer.weight@typedispatch
def _normalize_for_projector(x:TensorImage):
# 将张量归一化到0-1之间
img = x.clone()
sz = img.shape
img = img.view(x.size(0), -1)
img -= img.min(1, keepdim=True)[0]
img /= img.max(1, keepdim=True)[0]
img = img.view(*sz)
return imgfrom fastai.text.all import LMLearner, TextLearnerdef projector_word_embeddings(learn=None, layer=None, vocab=None, limit=-1, start=0, log_dir=None):
"Extracts and exports word embeddings from language models embedding layers"
if not layer:
if isinstance(learn, LMLearner): layer = learn.model[0].encoder
elif isinstance(learn, TextLearner): layer = learn.model[0].module.encoder
emb = layer.weight
img = torch.full((len(emb),3,8,8), 0.7)
vocab = learn.dls.vocab[0] if vocab == None else vocab
vocab = list(map(lambda x: f'{x}_', vocab))
writer = SummaryWriter(log_dir=log_dir)
end = start + limit if limit >= 0 else -1
writer.add_embedding(emb[start:end], metadata=vocab[start:end], label_img=img[start:end])
writer.close()show_doc(projector_word_embeddings)projector_word_embeddings
projector_word_embeddings (learn=None, layer=None, vocab=None, limit=-1, start=0, log_dir=None)
Extracts and exports word embeddings from language models embedding layers
from fastai.vision.data import *@typedispatch
def tensorboard_log(x:TensorImage, y: TensorCategory, samples, outs, writer, step):
fig,axs = get_grid(len(samples), return_fig=True)
for i in range(2):
axs = [b.show(ctx=c) for b,c in zip(samples.itemgot(i),axs)]
axs = [r.show(ctx=c, color='green' if b==r else 'red')
for b,r,c in zip(samples.itemgot(1),outs.itemgot(0),axs)]
writer.add_figure('Sample results', fig, step)from fastai.vision.core import TensorPoint,TensorBBox@typedispatch
def tensorboard_log(x:TensorImage, y: TensorImageBase|TensorPoint|TensorBBox, samples, outs, writer, step):
fig,axs = get_grid(len(samples), return_fig=True, double=True)
for i in range(2):
axs[::2] = [b.show(ctx=c) for b,c in zip(samples.itemgot(i),axs[::2])]
for x in [samples,outs]:
axs[1::2] = [b.show(ctx=c) for b,c in zip(x.itemgot(0),axs[1::2])]
writer.add_figure('Sample results', fig, step)TensorBoard回调
from fastai.vision.all import Resize, RandomSubsetSplitter, aug_transforms, vision_learner, resnet18path = untar_data(URLs.PETS)
db = DataBlock(blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files,
item_tfms=Resize(128),
splitter=RandomSubsetSplitter(train_sz=0.1, valid_sz=0.01),
batch_tfms=aug_transforms(size=64),
get_y=using_attr(RegexLabeller(r'(.+)_\d+.*$'), 'name'))
dls = db.dataloaders(path/'images')learn = vision_learner(dls, resnet18, metrics=accuracy)learn.unfreeze()
learn.fit_one_cycle(3, cbs=TensorBoardCallback(Path.home()/'tmp'/'runs'/'tb', trace_model=True))| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 4.973294 | 5.009670 | 0.082192 | 00:03 |
| 1 | 4.382769 | 4.438282 | 0.095890 | 00:03 |
| 2 | 3.877172 | 3.665855 | 0.178082 | 00:04 |
投影仪
TensorBoardCallback中的投影器
path = untar_data(URLs.PETS)db = DataBlock(blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files,
item_tfms=Resize(128),
splitter=RandomSubsetSplitter(train_sz=0.05, valid_sz=0.01),
batch_tfms=aug_transforms(size=64),
get_y=using_attr(RegexLabeller(r'(.+)_\d+.*$'), 'name'))
dls = db.dataloaders(path/'images')cbs = [TensorBoardCallback(log_dir=Path.home()/'tmp'/'runs'/'vision1', projector=True)]
learn = vision_learner(dls, resnet18, metrics=accuracy)learn.unfreeze()
learn.fit_one_cycle(3, cbs=cbs)| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 5.143322 | 6.736727 | 0.082192 | 00:03 |
| 1 | 4.508100 | 5.106580 | 0.109589 | 00:03 |
| 2 | 4.057889 | 4.194602 | 0.068493 | 00:03 |
TensorBoardProjectorCallback
path = untar_data(URLs.PETS)db = DataBlock(blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files,
item_tfms=Resize(128),
splitter=RandomSubsetSplitter(train_sz=0.1, valid_sz=0.01),
batch_tfms=aug_transforms(size=64),
get_y=using_attr(RegexLabeller(r'(.+)_\d+.*$'), 'name'))
dls = db.dataloaders(path/'images')files = get_image_files(path/'images')
files = files[:256]dl = learn.dls.test_dl(files, with_labels=True)learn = vision_learner(dls, resnet18, metrics=accuracy)
layer = learn.model[1][0].ap
cbs = [TensorBoardProjectorCallback(layer=layer, log_dir=Path.home()/'tmp'/'runs'/'vision2')]_ = learn.get_preds(dl=dl, cbs=cbs)投影词嵌入
fastai 文本或语言模型学习者
from fastai.text.all import TextDataLoaders, text_classifier_learner, AWD_LSTMdls = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test')
learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)projector_word_embeddings(learn, limit=1000, log_dir=Path.home()/'tmp'/'runs'/'text')变换器
GPT2
from transformers import GPT2LMHeadModel, GPT2TokenizerFasttokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
layer = model.transformer.wte
vocab_dict = tokenizer.get_vocab()
vocab = [k for k, v in sorted(vocab_dict.items(), key=lambda x: x[1])]
projector_word_embeddings(layer=layer, vocab=vocab, limit=2000, log_dir=Path.home()/'tmp'/'runs'/'transformers')BERT
from transformers import AutoTokenizer, AutoModeltokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
layer = model.embeddings.word_embeddings
vocab_dict = tokenizer.get_vocab()
vocab = [k for k, v in sorted(vocab_dict.items(), key=lambda x: x[1])]
projector_word_embeddings(layer=layer, vocab=vocab, limit=2000, start=2000, log_dir=Path.home()/'tmp'/'runs'/'transformers')warning: Embedding dir exists, did you set global_step for add_embedding()?
在tensorboard中验证结果
在命令行中运行以下命令以检查投影仪嵌入是否已正确写入:
tensorboard --logdir=~/tmp/runs
在浏览器中打开 http://localhost:6006 (TensorBoard Projector 在 Safari 中无法正确工作!)
导出 -
from nbdev import *
nbdev_export()