ResNet for CIFAR10 classification using Julia

4.9 μs

Configuration

3.9 μs

xxxxxxxxxx
 
using PlutoUI

25.7 ms

Table of Contents

ResNet for CIFAR10 classification using Julia

Configuration

Data

Model

Training

xxxxxxxxxx
 
TableOfContents()

3.5 ms

xxxxxxxxxx
 
using Flux

18.5 s

xxxxxxxxxx
 
using CUDA

26.4 ms

xxxxxxxxxx
 
using MLDatasets

3.6 s

xxxxxxxxxx
 
using Images

4.5 s

xxxxxxxxxx
 
using Augmentor

401 ms

xxxxxxxxxx
 
using Parameters

80.2 μs

xxxxxxxxxx
 
using IterTools

49.1 μs

xxxxxxxxxx
 
using OnlineStats

369 ms

xxxxxxxxxx
 
using Printf

136 μs

Config

xxxxxxxxxx
 
@with_kw struct Config
    batchsize::Int = 32
    throttle::Int = 20
    lr::Float32 = 1f-3
    epochs::Int = 2
end

2.5 ms

config

Config
  batchsize: Int64 32
  throttle: Int64 20
  lr: Float32 0.001f0
  epochs: Int64 2

xxxxxxxxxx
 
config = Config()

770 μs

Data

4.8 μs

One needs to download data only once

8.3 μs

download_data

false

xxxxxxxxxx
 
download_data = false

70.0 ns

xxxxxxxxxx
 
if download_data
    CIFAR10.download(i_accept_the_terms_of_use=true)
end

70.0 ns

xxxxxxxxxx
 
train_data = CIFAR10.traindata(Float32);

2.8 s

xxxxxxxxxx
 
test_data = CIFAR10.testdata(Float32);

51.7 ms

train_aug

8-step Augmentor.ImmutablePipeline:
 1.) Either: (50%) Flip the X axis. (50%) No operation.
 2.) Either: (50%) ShearX by ϕ ∈ -5:5 degree. (50%) ShearY by ψ ∈ -5:5 degree.
 3.) Rotate by θ ∈ -15:15 degree
 4.) Crop a 32×32 window around the center
 5.) Zoom by I ∈ {0.9×0.9, 1.0×1.0, 1.1×1.1, 1.2×1.2}
 6.) Split colorant into its color channels
 7.) Permute dimension order to (3, 2, 1)
 8.) Convert eltype to Float32

xxxxxxxxxx
 
train_aug = FlipX(0.5) |> ShearX(-5:5) * ShearY(-5:5) |> Rotate(-15:15) |>
            CropSize(32,32) |> Zoom(0.9:0.1:1.2) |>
            SplitChannels() |> PermuteDims(3, 2, 1) |> ConvertEltype(Float32)

729 ms

collate (generic function with 1 method)

xxxxxxxxxx
 
function collate((imgs, labels))
    imgs = imgs |> gpu
    labels = Flux.onehotbatch(labels .+ 1, 1:10) |> gpu
    imgs, labels
end

26.3 μs

collate (generic function with 2 methods)

xxxxxxxxxx
 
function collate((imgs, labels), aug)
    imgs_aug = Array{Float32}(undef, size(imgs))
    augmentbatch!(imgs_aug, CIFAR10.convert2image(imgs), aug)
    collate((imgs_aug, labels))
end

25.2 μs

xxxxxxxxxx
 
train_loader = imap(d -> collate(d, train_aug),
    Flux.Data.DataLoader(train_data, batchsize=config.batchsize, shuffle=true));

60.0 ms

xxxxxxxxxx
 
test_loader = imap(collate,
    Flux.Data.DataLoader(test_data, batchsize=config.batchsize, shuffle=false));

4.9 ms

"deer"

xxxxxxxxxx
 
begin
    batch = iterate(train_loader)[1]
    CIFAR10.classnames()[Flux.onecold(cpu(batch[2])[:, 1], 1:10)],
    CIFAR10.convert2image(cpu(batch[1])[:,:,:, 1])
end

4.5 s

Model

4.0 μs

conv_block (generic function with 1 method)

xxxxxxxxxx
 
function conv_block(ch::Pair; kernel_size=3, stride=1, activation=relu)
    Chain(Conv((kernel_size, kernel_size), ch, pad=SamePad(), stride=stride,
                init=Flux.kaiming_normal),
          BatchNorm(ch.second, activation))
end

84.2 μs

basic_residual (generic function with 1 method)

xxxxxxxxxx
 
function basic_residual(ch::Pair)
    Chain(conv_block(ch),
          conv_block(ch.second => ch.second, activation=identity))
end

23.0 μs

xxxxxxxxxx
 
begin
    struct AddMerge
        gamma
        expand
    end
    
    Flux.@functor AddMerge
    
    function AddMerge(ch::Pair)
        if ch.first == ch.second
            expand = identity
        else
            expand = conv_block(ch, kernel_size=1, activation=identity)
        end
            AddMerge([0.f0], expand)
    end
    
    (m::AddMerge)(x1, x2) = relu.(m.gamma .* x1 .+ m.expand(x2))
end

40.2 ms

residual_block (generic function with 1 method)

xxxxxxxxxx
 
function residual_block(ch::Pair)
    residual = basic_residual(ch)
    SkipConnection(residual, AddMerge(ch))
end

13.5 μs

residual_body (generic function with 1 method)

xxxxxxxxxx
 
function residual_body(in_channels, repetitions, downsamplings)
    layers = []
    res_channels = in_channels
    for (rep, stride) in zip(repetitions, downsamplings)
        if stride > 1
            push!(layers, MaxPool((stride, stride)))
        end
        for i = 1:rep
            push!(layers, residual_block(in_channels => res_channels))
            in_channels = res_channels
        end
        res_channels *= 2
    end
    Chain(layers...)
end

38.1 μs

stem (generic function with 2 methods)

xxxxxxxxxx
 
function stem(in_channels=3; channel_list = [32, 32, 64], stride=1)
    layers = []
    for channels in channel_list
        push!(layers, conv_block(in_channels => channels, stride=stride))
        in_channels = channels
        stride=1
    end
    Chain(layers...)
end

62.5 μs

head (generic function with 2 methods)

xxxxxxxxxx
 
function head(in_channels, classes, p_drop=0.)
    Chain(GlobalMeanPool(),
          flatten,
          Dropout(p_drop),
          Dense(in_channels, classes))
end

23.6 μs

resnet (generic function with 1 method)

xxxxxxxxxx
 
function resnet(classes, repetitions, downsamplings; in_channels=3, p_drop=0.)
    Chain(stem(in_channels, stride=downsamplings[1]),
          residual_body(64, repetitions, downsamplings[1:end]),
          head(64 * 2^(length(repetitions)-1), classes, p_drop))
end

51.7 μs

model

Chain(Chain(Chain(Conv((3, 3), 3=>32), BatchNorm(32, λ = relu)), Chain(Conv((3, 3), 32=>32), BatchNorm(32, λ = relu)), Chain(Conv((3, 3), 32=>64), BatchNorm(64, λ = relu))), Chain(SkipConnection(Chain(Chain(Conv((3, 3), 64=>64), BatchNorm(64, λ = relu)), Chain(Conv((3, 3), 64=>64), BatchNorm(64))), AddMerge(Float32[0.0], identity)), SkipConnection(Chain(Chain(Conv((3, 3), 64=>64), BatchNorm(64, λ = relu)), Chain(Conv((3, 3), 64=>64), BatchNorm(64))), AddMerge(Float32[0.0], identity)), SkipConnection(Chain(Chain(Conv((3, 3), 64=>128), BatchNorm(128, λ = relu)), Chain(Conv((3, 3), 128=>128), BatchNorm(128))), AddMerge(Float32[0.0], Chain(Conv((1, 1), 64=>128), BatchNorm(128)))), SkipConnection(Chain(Chain(Conv((3, 3), 128=>128), BatchNorm(128, λ = relu)), Chain(Conv((3, 3), 128=>128), BatchNorm(128))), AddMerge(Float32[0.0], identity)), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), SkipConnection(Chain(Chain(Conv((3, 3), 128=>256), BatchNorm(256, λ = relu)), Chain(Conv((3, 3), 256=>256), BatchNorm(256))), AddMerge(Float32[0.0], Chain(Conv((1, 1), 128=>256), BatchNorm(256)))), SkipConnection(Chain(Chain(Conv((3, 3), 256=>256), BatchNorm(256, λ = relu)), Chain(Conv((3, 3), 256=>256), BatchNorm(256))), AddMerge(Float32[0.0], identity)), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), SkipConnection(Chain(Chain(Conv((3, 3), 256=>512), BatchNorm(512, λ = relu)), Chain(Conv((3, 3), 512=>512), BatchNorm(512))), AddMerge(Float32[0.0], Chain(Conv((1, 1), 256=>512), BatchNorm(512)))), SkipConnection(Chain(Chain(Conv((3, 3), 512=>512), BatchNorm(512, λ = relu)), Chain(Conv((3, 3), 512=>512), BatchNorm(512))), AddMerge(Float32[0.0], identity))), Chain(GlobalMeanPool(), flatten, Dropout(0.3), Dense(512, 10)))

xxxxxxxxxx
 
model = resnet(10, [2, 2, 2, 2], [1, 1, 2, 2, 2], p_drop=0.3) |> gpu

2.4 s

Training

2.5 μs

loss (generic function with 1 method)

xxxxxxxxxx
 
loss(x, y) = Flux.logitcrossentropy(model(x), y)

18.4 μs

xxxxxxxxxx
 
ps = params(model);

297 ms

opt

Flux.Optimise.OptimiserosAny1InvDecaygamma

0.001

stateIdDict2OptimiserosAny1ADAMeta

0.001

beta1

0.9

0.999

stateIdDict2WeightDecaywd

0.0001

xxxxxxxxxx
 
opt = Flux.Optimiser(InvDecay(0.001), ADAMW(config.lr, (0.9, 0.999), 1f-4))

10.0 ms

accuracy (generic function with 1 method)

xxxxxxxxxx
 
function accuracy(model, data)
    m = Mean()
    for (x, y) in data
        fit!(m, Flux.onecold(cpu(model(x)), 1:10) .== Flux.onecold(cpu(y), 1:10))
    end
    value(m)
end

50.3 μs

evalcb

(::Flux.var"#throttled#42"{Flux.var"#throttled#38#43"{Bool,Bool,Main.workspace3.var"#11#12",Int64}}) (generic function with 1 method)

xxxxxxxxxx
 
evalcb = Flux.throttle(config.throttle) do
    @printf "Val accuracy: %.3f\n" accuracy(model, test_loader)
end

6.2 ms

do_training

false

xxxxxxxxxx
 
do_training = false

70.0 ns

xxxxxxxxxx
 
if do_training
    Flux.@epochs config.epochs Flux.train!(loss, ps, train_loader, opt, cb=evalcb)
end

34.4 μs