Imports
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.utils import convert_tensor
import ignite.metrics
import ignite.contrib.handlers
Configuration
DATA_DIR='./data'
NUM_CLASSES = 10
NUM_WORKERS = 20
BATCH_SIZE = 32
NUM_BA_COPIES = 4
EPOCHS = 200
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("device:", DEVICE)
device: cuda
Trivial Augment, arXiv:2103.10158 [cs.CV]
Random Erasing, arXiv:1708.04896 [cs.CV]
train_transform = transforms.Compose([
transforms.TrivialAugmentWide(interpolation=transforms.InterpolationMode.BILINEAR),
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, padding=4),
transforms.PILToTensor(),
transforms.ConvertImageDtype(torch.float),
transforms.RandomErasing(p=0.1)
])
train_dset = datasets.CIFAR10(root=DATA_DIR, train=True, download=True)
test_dset = datasets.CIFAR10(root=DATA_DIR, train=False, download=True, transform=transforms.ToTensor())
Files already downloaded and verified Files already downloaded and verified
Batch Augmentation, arXiv:1901.09335 [cs.LG]
class CollateFN:
def __init__(self, transform, num_copies=1):
self.transform = transform
self.num_copies = num_copies
def __call__(self, batch):
images, labels = zip(*batch)
transformed_images = [self.transform(img) for img in images for _ in range(self.num_copies)]
new_labels = [lbl for lbl in labels for _ in range(self.num_copies)]
X = torch.stack(transformed_images, dim=0)
Y = torch.tensor(new_labels)
return X, Y
train_loader = torch.utils.data.DataLoader(train_dset, batch_size=BATCH_SIZE, shuffle=True,
collate_fn=CollateFN(train_transform, NUM_BA_COPIES),
num_workers=NUM_WORKERS, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=NUM_WORKERS, pin_memory=True)
def dataset_show_image(dset, idx):
X, Y = dset[idx]
title = "Ground truth: {}".format(dset.classes[Y])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_axis_off()
ax.imshow(np.moveaxis(X.numpy(), 0, -1))
ax.set_title(title)
plt.show()
dataset_show_image(test_dset, 1)
Improvements of ResNet from arXiv:1812.01187 [cs.CV]
SiLU activation $x\cdot\sigma(x)$, arXiv:1702.03118 [cs.LG]; more general Swish activation $x\cdot\sigma(\beta x)$, arXiv:1710.05941 [cs.NE]
class ConvBlock(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, act=True):
padding = (kernel_size - 1) // 2
layers = [
nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False),
nn.BatchNorm2d(out_channels)
]
if act: layers.append(nn.SiLU(inplace=True))
super().__init__(*layers)
class BasicResidual(nn.Sequential):
def __init__(self, in_channels, out_channels, p_drop=0.):
super().__init__(
ConvBlock(in_channels, out_channels),
ConvBlock(out_channels, out_channels, act=False),
nn.Dropout2d(p_drop)
)
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, p_drop=0.):
super().__init__()
self.shortcut = self.get_shortcut(in_channels, out_channels)
self.residual = BasicResidual(in_channels, out_channels, p_drop)
self.act = nn.SiLU(inplace=True)
self.gamma = nn.Parameter(torch.zeros(1))
def forward(self, x):
out = self.shortcut(x) + self.gamma * self.residual(x)
return self.act(out)
def get_shortcut(self, in_channels, out_channels):
if in_channels != out_channels:
shortcut = ConvBlock(in_channels, out_channels, 1, act=False)
else:
shortcut = nn.Identity()
return shortcut
class ResidualStack(nn.Sequential):
def __init__(self, in_channels, repetitions, strides, p_drop=0.):
layers = []
out_channels = in_channels
for rep, stride in zip(repetitions, strides):
if stride > 1: layers.append(nn.MaxPool2d(stride))
for _ in range(rep):
layers.append(ResidualBlock(in_channels, out_channels, p_drop))
in_channels = out_channels
out_channels *= 2
super().__init__(*layers)
class Stem(nn.Sequential):
def __init__(self, in_channels, channel_list, stride):
layers = []
for out_channels in channel_list:
layers.append(ConvBlock(in_channels, out_channels, 3, stride=stride))
in_channels = out_channels
stride = 1
super().__init__(*layers)
class Head(nn.Sequential):
def __init__(self, in_channels, classes, p_drop=0.):
super().__init__(
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Dropout(p_drop),
nn.Linear(in_channels, classes)
)
class ResNet(nn.Sequential):
def __init__(self, classes, repetitions, strides=None, in_channels=3, res_p_drop=0., head_p_drop=0.):
if strides is None: strides = [2] * (len(repetitions) + 1)
super().__init__(
Stem(in_channels, [32, 32, 64], strides[0]),
ResidualStack(64, repetitions, strides[1:], res_p_drop),
Head(64 * 2**(len(repetitions) - 1), classes, head_p_drop)
)
def init_linear(m):
if isinstance(m, (nn.Conv2d, nn.Linear)):
nn.init.kaiming_normal_(m.weight)
if m.bias is not None: nn.init.zeros_(m.bias)
model = ResNet(NUM_CLASSES, [2, 2, 2, 2], strides=[1, 1, 2, 2, 2], res_p_drop=0., head_p_drop=0.3)
model.apply(init_linear);
model.to(DEVICE);
print("Number of parameters: {:,}".format(sum(p.numel() for p in model.parameters())))
Number of parameters: 11,200,882
def reduce_loss(loss, reduction='mean'):
return loss.mean() if reduction=='mean' else loss.sum() if reduction=='sum' else loss
Label smoothing introduced in arXiv:1512.00567 [cs.CV]
class LabelSmoothingCrossEntropy(nn.Module):
def __init__(self, ε=0.1, reduction='mean'):
super().__init__()
self.ε = ε
self.reduction = reduction
def forward(self, output, target):
c = output.size(-1)
log_preds = F.log_softmax(output, dim=-1)
loss1 = reduce_loss(-log_preds.sum(dim=-1) / c, self.reduction)
loss2 = F.nll_loss(log_preds, target, reduction=self.reduction)
loss = (1. - self.ε) * loss2 + self.ε * loss1
return loss
CutMix, arXiv:1905.04899 [cs.CV]
class CutMix(nn.Module):
def __init__(self, loss, α=1.0):
super().__init__()
self.loss = loss
self.α = α
self.rng = np.random.default_rng()
def prepare_batch(self, batch, device, non_blocking):
x, y = batch
x = convert_tensor(x, device=device, non_blocking=non_blocking)
y = convert_tensor(y, device=device, non_blocking=non_blocking)
batch_size = x.size(0)
self.index = torch.randperm(batch_size).to(device)
self.λ = self.rng.beta(self.α, self.α)
y1, x1, y2, x2 = self.cut_bounding_box(x.shape[-2:], self.λ)
x[:, :, y1:y2, x1:x2] = x[self.index, :, y1:y2, x1:x2]
# adjust lambda to exactly match pixel ratio
area = x.size(2) * x.size(3)
self.λ = 1. - (x2 - x1) * (y2 - y1) / area
return x, y
def cut_bounding_box(self, shape, λ):
cut_size_2 = 0.5 * np.sqrt(1. - λ)
center_yx = self.rng.random(2)
y1x1 = (np.clip(center_yx - cut_size_2, 0., 1.) * shape).astype(int)
y2x2 = (np.clip(center_yx + cut_size_2, 0., 1.) * shape).astype(int)
return np.concatenate((y1x1, y2x2))
def forward(self, pred, target):
orig_reduction = self.loss.reduction
self.loss.reduction = 'none'
batch_loss = self.λ * self.loss(pred, target) + (1. - self.λ) * self.loss(pred, target[self.index])
self.loss.reduction = orig_reduction
return reduce_loss(batch_loss, orig_reduction)
Lookahead optimizer, arXiv:1907.08610 [cs.LG]
class Lookahead(optim.Optimizer):
def __init__(self, optimizer, k=5, alpha=0.5):
self.optimizer = optimizer
self.k = k
self.alpha = alpha
self.param_groups = self.optimizer.param_groups
self.state = defaultdict(dict)
self.fast_state = self.optimizer.state
for group in self.param_groups:
group["counter"] = 0
def update(self, group):
for fast in group["params"]:
param_state = self.state[fast]
if "slow_param" not in param_state:
param_state["slow_param"] = torch.zeros_like(fast.data)
param_state["slow_param"].copy_(fast.data)
slow = param_state["slow_param"]
slow += (fast.data - slow) * self.alpha
fast.data.copy_(slow)
def update_lookahead(self):
for group in self.param_groups:
self.update(group)
def step(self, closure=None):
loss = self.optimizer.step(closure)
for group in self.param_groups:
if group["counter"] == 0:
self.update(group)
group["counter"] += 1
if group["counter"] >= self.k:
group["counter"] = 0
return loss
def state_dict(self):
fast_state_dict = self.optimizer.state_dict()
slow_state = {
(id(k) if isinstance(k, torch.Tensor) else k): v
for k, v in self.state.items()
}
fast_state = fast_state_dict["state"]
param_groups = fast_state_dict["param_groups"]
return {
"fast_state": fast_state,
"slow_state": slow_state,
"param_groups": param_groups,
}
def load_state_dict(self, state_dict):
slow_state_dict = {
"state": state_dict["slow_state"],
"param_groups": state_dict["param_groups"],
}
fast_state_dict = {
"state": state_dict["fast_state"],
"param_groups": state_dict["param_groups"],
}
super().load_state_dict(slow_state_dict)
self.optimizer.load_state_dict(fast_state_dict)
self.fast_state = self.optimizer.state
def add_param_group(self, param_group):
param_group["counter"] = 0
self.optimizer.add_param_group(param_group)
class History:
def __init__(self):
self.values = defaultdict(list)
def append(self, key, value):
self.values[key].append(value)
def reset(self):
for k in self.values.keys():
self.values[k] = []
def _begin_plot(self):
self.fig = plt.figure()
self.ax = self.fig.add_subplot(111)
def _end_plot(self, ylabel):
self.ax.set_xlabel('epoch')
self.ax.set_ylabel(ylabel)
plt.show()
def _plot(self, key, line_type='-', label=None):
if label is None: label=key
xs = np.arange(1, len(self.values[key])+1)
self.ax.plot(xs, self.values[key], line_type, label=label)
def plot(self, key):
self._begin_plot()
self._plot(key, '-')
self._end_plot(key)
def plot_train_val(self, key):
self._begin_plot()
self._plot('train ' + key, '.-', 'train')
self._plot('val ' + key, '.-', 'val')
self.ax.legend()
self._end_plot(key)
loss = LabelSmoothingCrossEntropy()
cutmix = CutMix(loss, α=1.0)
AdamW optimizer, arXiv:1711.05101 [cs.LG]
base_optimizer = optim.AdamW(model.parameters(), lr=1e-6, weight_decay=1e-2)
optimizer = Lookahead(base_optimizer, k=5, alpha=0.5)
trainer = create_supervised_trainer(model, optimizer, cutmix, device=DEVICE, prepare_batch=cutmix.prepare_batch)
"1cycle" leraning rate policy, arXiv:1803.09820 [cs.LG]
lr_scheduler = optim.lr_scheduler.OneCycleLR(base_optimizer, max_lr=1e-2,
steps_per_epoch=len(train_loader), epochs=EPOCHS)
trainer.add_event_handler(Events.ITERATION_COMPLETED, lambda engine: lr_scheduler.step());
ignite.metrics.RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
val_metrics = {"accuracy": ignite.metrics.Accuracy(), "loss": ignite.metrics.Loss(loss)}
evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=DEVICE)
history = History()
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
train_state = engine.state
epoch = train_state.epoch
max_epochs = train_state.max_epochs
train_loss = train_state.metrics["loss"]
history.append('train loss', train_loss)
evaluator.run(test_loader)
val_metrics = evaluator.state.metrics
val_loss = val_metrics["loss"]
val_acc = val_metrics["accuracy"]
history.append('val loss', val_loss)
history.append('val acc', val_acc)
print("{}/{} - train: loss {:.3f}; val: loss {:.3f} accuracy {:.3f}".format(
epoch, max_epochs, train_loss, val_loss, val_acc))
trainer.run(train_loader, max_epochs=EPOCHS);
1/200 - train: loss 1.980; val: loss 1.370 accuracy 0.613 2/200 - train: loss 1.878; val: loss 1.150 accuracy 0.725 3/200 - train: loss 1.831; val: loss 1.063 accuracy 0.770 4/200 - train: loss 1.756; val: loss 1.005 accuracy 0.794 5/200 - train: loss 1.703; val: loss 0.964 accuracy 0.825 6/200 - train: loss 1.679; val: loss 0.935 accuracy 0.827 7/200 - train: loss 1.680; val: loss 0.928 accuracy 0.839 8/200 - train: loss 1.623; val: loss 0.871 accuracy 0.855 9/200 - train: loss 1.612; val: loss 0.861 accuracy 0.866 10/200 - train: loss 1.610; val: loss 0.881 accuracy 0.860 11/200 - train: loss 1.571; val: loss 0.834 accuracy 0.880 12/200 - train: loss 1.556; val: loss 0.806 accuracy 0.891 13/200 - train: loss 1.540; val: loss 0.790 accuracy 0.890 14/200 - train: loss 1.538; val: loss 0.789 accuracy 0.906 15/200 - train: loss 1.528; val: loss 0.778 accuracy 0.900 16/200 - train: loss 1.532; val: loss 0.793 accuracy 0.897 17/200 - train: loss 1.528; val: loss 0.754 accuracy 0.917 18/200 - train: loss 1.469; val: loss 0.746 accuracy 0.912 19/200 - train: loss 1.453; val: loss 0.736 accuracy 0.923 20/200 - train: loss 1.498; val: loss 0.753 accuracy 0.917 21/200 - train: loss 1.423; val: loss 0.692 accuracy 0.928 22/200 - train: loss 1.440; val: loss 0.693 accuracy 0.931 23/200 - train: loss 1.420; val: loss 0.718 accuracy 0.926 24/200 - train: loss 1.456; val: loss 0.709 accuracy 0.928 25/200 - train: loss 1.411; val: loss 0.741 accuracy 0.917 26/200 - train: loss 1.409; val: loss 0.709 accuracy 0.926 27/200 - train: loss 1.393; val: loss 0.704 accuracy 0.925 28/200 - train: loss 1.388; val: loss 0.694 accuracy 0.928 29/200 - train: loss 1.417; val: loss 0.687 accuracy 0.931 30/200 - train: loss 1.428; val: loss 0.707 accuracy 0.924 31/200 - train: loss 1.397; val: loss 0.678 accuracy 0.936 32/200 - train: loss 1.378; val: loss 0.666 accuracy 0.939 33/200 - train: loss 1.357; val: loss 0.697 accuracy 0.923 34/200 - train: loss 1.406; val: loss 0.675 accuracy 0.934 35/200 - train: loss 1.366; val: loss 0.701 accuracy 0.923 36/200 - train: loss 1.346; val: loss 0.690 accuracy 0.926 37/200 - train: loss 1.369; val: loss 0.646 accuracy 0.942 38/200 - train: loss 1.393; val: loss 0.696 accuracy 0.925 39/200 - train: loss 1.397; val: loss 0.658 accuracy 0.938 40/200 - train: loss 1.425; val: loss 0.747 accuracy 0.905 41/200 - train: loss 1.360; val: loss 0.682 accuracy 0.931 42/200 - train: loss 1.379; val: loss 0.675 accuracy 0.930 43/200 - train: loss 1.413; val: loss 0.701 accuracy 0.923 44/200 - train: loss 1.352; val: loss 0.664 accuracy 0.936 45/200 - train: loss 1.386; val: loss 0.741 accuracy 0.907 46/200 - train: loss 1.391; val: loss 0.686 accuracy 0.930 47/200 - train: loss 1.390; val: loss 0.642 accuracy 0.946 48/200 - train: loss 1.375; val: loss 0.662 accuracy 0.936 49/200 - train: loss 1.390; val: loss 0.655 accuracy 0.942 50/200 - train: loss 1.395; val: loss 0.696 accuracy 0.924 51/200 - train: loss 1.383; val: loss 0.661 accuracy 0.939 52/200 - train: loss 1.358; val: loss 0.634 accuracy 0.950 53/200 - train: loss 1.400; val: loss 0.691 accuracy 0.927 54/200 - train: loss 1.378; val: loss 0.646 accuracy 0.942 55/200 - train: loss 1.379; val: loss 0.670 accuracy 0.930 56/200 - train: loss 1.420; val: loss 0.675 accuracy 0.928 57/200 - train: loss 1.367; val: loss 0.641 accuracy 0.947 58/200 - train: loss 1.362; val: loss 0.654 accuracy 0.942 59/200 - train: loss 1.361; val: loss 0.648 accuracy 0.944 60/200 - train: loss 1.346; val: loss 0.689 accuracy 0.924 61/200 - train: loss 1.329; val: loss 0.650 accuracy 0.943 62/200 - train: loss 1.392; val: loss 0.637 accuracy 0.949 63/200 - train: loss 1.353; val: loss 0.691 accuracy 0.927 64/200 - train: loss 1.359; val: loss 0.643 accuracy 0.945 65/200 - train: loss 1.329; val: loss 0.676 accuracy 0.929 66/200 - train: loss 1.330; val: loss 0.657 accuracy 0.940 67/200 - train: loss 1.379; val: loss 0.630 accuracy 0.952 68/200 - train: loss 1.338; val: loss 0.695 accuracy 0.923 69/200 - train: loss 1.326; val: loss 0.637 accuracy 0.946 70/200 - train: loss 1.373; val: loss 0.703 accuracy 0.921 71/200 - train: loss 1.312; val: loss 0.631 accuracy 0.947 72/200 - train: loss 1.342; val: loss 0.628 accuracy 0.952 73/200 - train: loss 1.362; val: loss 0.662 accuracy 0.936 74/200 - train: loss 1.325; val: loss 0.649 accuracy 0.945 75/200 - train: loss 1.336; val: loss 0.675 accuracy 0.934 76/200 - train: loss 1.347; val: loss 0.649 accuracy 0.942 77/200 - train: loss 1.325; val: loss 0.622 accuracy 0.953 78/200 - train: loss 1.377; val: loss 0.642 accuracy 0.945 79/200 - train: loss 1.344; val: loss 0.648 accuracy 0.941 80/200 - train: loss 1.319; val: loss 0.629 accuracy 0.952 81/200 - train: loss 1.324; val: loss 0.664 accuracy 0.936 82/200 - train: loss 1.341; val: loss 0.627 accuracy 0.950 83/200 - train: loss 1.326; val: loss 0.658 accuracy 0.940 84/200 - train: loss 1.319; val: loss 0.624 accuracy 0.950 85/200 - train: loss 1.329; val: loss 0.665 accuracy 0.935 86/200 - train: loss 1.366; val: loss 0.640 accuracy 0.945 87/200 - train: loss 1.351; val: loss 0.620 accuracy 0.953 88/200 - train: loss 1.348; val: loss 0.650 accuracy 0.942 89/200 - train: loss 1.303; val: loss 0.622 accuracy 0.953 90/200 - train: loss 1.306; val: loss 0.630 accuracy 0.950 91/200 - train: loss 1.294; val: loss 0.643 accuracy 0.948 92/200 - train: loss 1.318; val: loss 0.627 accuracy 0.951 93/200 - train: loss 1.348; val: loss 0.657 accuracy 0.941 94/200 - train: loss 1.351; val: loss 0.618 accuracy 0.956 95/200 - train: loss 1.311; val: loss 0.651 accuracy 0.941 96/200 - train: loss 1.304; val: loss 0.640 accuracy 0.943 97/200 - train: loss 1.327; val: loss 0.618 accuracy 0.956 98/200 - train: loss 1.322; val: loss 0.644 accuracy 0.949 99/200 - train: loss 1.299; val: loss 0.637 accuracy 0.947 100/200 - train: loss 1.297; val: loss 0.622 accuracy 0.953 101/200 - train: loss 1.334; val: loss 0.609 accuracy 0.958 102/200 - train: loss 1.299; val: loss 0.609 accuracy 0.958 103/200 - train: loss 1.309; val: loss 0.625 accuracy 0.952 104/200 - train: loss 1.288; val: loss 0.629 accuracy 0.948 105/200 - train: loss 1.301; val: loss 0.633 accuracy 0.950 106/200 - train: loss 1.296; val: loss 0.631 accuracy 0.948 107/200 - train: loss 1.321; val: loss 0.609 accuracy 0.958 108/200 - train: loss 1.299; val: loss 0.613 accuracy 0.955 109/200 - train: loss 1.273; val: loss 0.619 accuracy 0.952 110/200 - train: loss 1.286; val: loss 0.668 accuracy 0.935 111/200 - train: loss 1.290; val: loss 0.623 accuracy 0.951 112/200 - train: loss 1.293; val: loss 0.618 accuracy 0.955 113/200 - train: loss 1.313; val: loss 0.615 accuracy 0.955 114/200 - train: loss 1.291; val: loss 0.625 accuracy 0.953 115/200 - train: loss 1.240; val: loss 0.631 accuracy 0.951 116/200 - train: loss 1.286; val: loss 0.613 accuracy 0.958 117/200 - train: loss 1.308; val: loss 0.615 accuracy 0.957 118/200 - train: loss 1.283; val: loss 0.640 accuracy 0.951 119/200 - train: loss 1.275; val: loss 0.598 accuracy 0.961 120/200 - train: loss 1.283; val: loss 0.637 accuracy 0.947 121/200 - train: loss 1.269; val: loss 0.608 accuracy 0.962 122/200 - train: loss 1.315; val: loss 0.621 accuracy 0.951 123/200 - train: loss 1.301; val: loss 0.617 accuracy 0.953 124/200 - train: loss 1.268; val: loss 0.604 accuracy 0.961 125/200 - train: loss 1.266; val: loss 0.621 accuracy 0.952 126/200 - train: loss 1.270; val: loss 0.606 accuracy 0.958 127/200 - train: loss 1.275; val: loss 0.600 accuracy 0.962 128/200 - train: loss 1.251; val: loss 0.606 accuracy 0.958 129/200 - train: loss 1.260; val: loss 0.596 accuracy 0.962 130/200 - train: loss 1.259; val: loss 0.610 accuracy 0.957 131/200 - train: loss 1.239; val: loss 0.595 accuracy 0.961 132/200 - train: loss 1.242; val: loss 0.604 accuracy 0.961 133/200 - train: loss 1.239; val: loss 0.601 accuracy 0.960 134/200 - train: loss 1.250; val: loss 0.596 accuracy 0.963 135/200 - train: loss 1.228; val: loss 0.599 accuracy 0.963 136/200 - train: loss 1.252; val: loss 0.597 accuracy 0.964 137/200 - train: loss 1.238; val: loss 0.591 accuracy 0.964 138/200 - train: loss 1.289; val: loss 0.611 accuracy 0.961 139/200 - train: loss 1.236; val: loss 0.595 accuracy 0.962 140/200 - train: loss 1.229; val: loss 0.604 accuracy 0.960 141/200 - train: loss 1.190; val: loss 0.597 accuracy 0.962 142/200 - train: loss 1.221; val: loss 0.593 accuracy 0.965 143/200 - train: loss 1.218; val: loss 0.594 accuracy 0.963 144/200 - train: loss 1.232; val: loss 0.590 accuracy 0.964 145/200 - train: loss 1.197; val: loss 0.594 accuracy 0.965 146/200 - train: loss 1.239; val: loss 0.591 accuracy 0.966 147/200 - train: loss 1.197; val: loss 0.583 accuracy 0.969 148/200 - train: loss 1.224; val: loss 0.590 accuracy 0.964 149/200 - train: loss 1.178; val: loss 0.608 accuracy 0.958 150/200 - train: loss 1.207; val: loss 0.589 accuracy 0.968 151/200 - train: loss 1.221; val: loss 0.587 accuracy 0.967 152/200 - train: loss 1.213; val: loss 0.583 accuracy 0.968 153/200 - train: loss 1.191; val: loss 0.587 accuracy 0.966 154/200 - train: loss 1.202; val: loss 0.593 accuracy 0.968 155/200 - train: loss 1.201; val: loss 0.595 accuracy 0.965 156/200 - train: loss 1.206; val: loss 0.579 accuracy 0.971 157/200 - train: loss 1.186; val: loss 0.580 accuracy 0.970 158/200 - train: loss 1.207; val: loss 0.587 accuracy 0.968 159/200 - train: loss 1.149; val: loss 0.576 accuracy 0.971 160/200 - train: loss 1.181; val: loss 0.580 accuracy 0.968 161/200 - train: loss 1.178; val: loss 0.578 accuracy 0.970 162/200 - train: loss 1.144; val: loss 0.577 accuracy 0.971 163/200 - train: loss 1.132; val: loss 0.582 accuracy 0.969 164/200 - train: loss 1.156; val: loss 0.577 accuracy 0.971 165/200 - train: loss 1.185; val: loss 0.580 accuracy 0.970 166/200 - train: loss 1.170; val: loss 0.572 accuracy 0.974 167/200 - train: loss 1.171; val: loss 0.578 accuracy 0.972 168/200 - train: loss 1.157; val: loss 0.575 accuracy 0.972 169/200 - train: loss 1.130; val: loss 0.571 accuracy 0.973 170/200 - train: loss 1.137; val: loss 0.576 accuracy 0.971 171/200 - train: loss 1.127; val: loss 0.572 accuracy 0.972 172/200 - train: loss 1.165; val: loss 0.572 accuracy 0.974 173/200 - train: loss 1.136; val: loss 0.570 accuracy 0.973 174/200 - train: loss 1.166; val: loss 0.565 accuracy 0.975 175/200 - train: loss 1.156; val: loss 0.576 accuracy 0.971 176/200 - train: loss 1.119; val: loss 0.563 accuracy 0.974 177/200 - train: loss 1.148; val: loss 0.567 accuracy 0.974 178/200 - train: loss 1.138; val: loss 0.566 accuracy 0.975 179/200 - train: loss 1.133; val: loss 0.571 accuracy 0.974 180/200 - train: loss 1.156; val: loss 0.569 accuracy 0.976 181/200 - train: loss 1.130; val: loss 0.566 accuracy 0.976 182/200 - train: loss 1.142; val: loss 0.563 accuracy 0.976 183/200 - train: loss 1.093; val: loss 0.564 accuracy 0.975 184/200 - train: loss 1.096; val: loss 0.563 accuracy 0.975 185/200 - train: loss 1.145; val: loss 0.566 accuracy 0.977 186/200 - train: loss 1.115; val: loss 0.561 accuracy 0.977 187/200 - train: loss 1.141; val: loss 0.565 accuracy 0.976 188/200 - train: loss 1.127; val: loss 0.562 accuracy 0.976 189/200 - train: loss 1.125; val: loss 0.564 accuracy 0.976 190/200 - train: loss 1.090; val: loss 0.562 accuracy 0.976 191/200 - train: loss 1.085; val: loss 0.566 accuracy 0.976 192/200 - train: loss 1.118; val: loss 0.563 accuracy 0.977 193/200 - train: loss 1.139; val: loss 0.564 accuracy 0.976 194/200 - train: loss 1.104; val: loss 0.562 accuracy 0.976 195/200 - train: loss 1.141; val: loss 0.565 accuracy 0.976 196/200 - train: loss 1.121; val: loss 0.563 accuracy 0.977 197/200 - train: loss 1.084; val: loss 0.566 accuracy 0.976 198/200 - train: loss 1.144; val: loss 0.563 accuracy 0.976 199/200 - train: loss 1.107; val: loss 0.562 accuracy 0.977 200/200 - train: loss 1.127; val: loss 0.565 accuracy 0.977
history.plot_train_val('loss')
history.plot('val acc')