SlideShare a Scribd company logo
1 of 7
Download to read offline
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 1/7
AlexNet
AlexNet
Use it on scaled-up 224x224 dimensional Fashion-MNIST with 10 class output.
In [1]: import d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import data as gdata, nn
from mxnet.gluon import loss as gloss
import os
import sys
import time
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 2/7
In [2]: net = nn.Sequential()
# Use a larger 11 x 11 window to capture objects. Stride of 4 to reduce size.
# The number of output channels is much larger than that in LeNet.
net.add(nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# Make the convolution window smaller, set padding to 2 for consistent hei
ght/width
nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# Use three successive convolutional layers and a smaller convolution wind
ow.
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),
# Reduce dimensionality
nn.MaxPool2D(pool_size=3, strides=2),
# Expensive dense layer
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
# Output layer. Since we are using Fashion-MNIST, n = 10
nn.Dense(10))
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 3/7
We construct a single-channel data instance with both height and width of 224 to observe
the output shape of each layer. It matches our diagram above.
In [3]: X = nd.random.uniform(shape=(1, 1, 224, 224))
net.initialize()
for layer in net:
X = layer(X)
print(layer.name, 'output shape:t', X.shape)
conv0 output shape: (1, 96, 54, 54)
pool0 output shape: (1, 96, 26, 26)
conv1 output shape: (1, 256, 26, 26)
pool1 output shape: (1, 256, 12, 12)
conv2 output shape: (1, 384, 12, 12)
conv3 output shape: (1, 384, 12, 12)
conv4 output shape: (1, 256, 12, 12)
pool2 output shape: (1, 256, 5, 5)
dense0 output shape: (1, 4096)
dropout0 output shape: (1, 4096)
dense1 output shape: (1, 4096)
dropout1 output shape: (1, 4096)
dense2 output shape: (1, 10)
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 4/7
Reading Data
Reading Data
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 5/7
In [4]: def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join(
'~', '.mxnet', 'datasets', 'fashion-mnist')):
root = os.path.expanduser(root) # Expand the user path '~'.
transformer = []
if resize:
transformer += [gdata.vision.transforms.Resize(resize)]
transformer += [gdata.vision.transforms.ToTensor()]
transformer = gdata.vision.transforms.Compose(transformer)
mnist_train = gdata.vision.FashionMNIST(root=root, train=True)
mnist_test = gdata.vision.FashionMNIST(root=root, train=False)
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(
mnist_train.transform_first(transformer), batch_size, shuffle=True,
num_workers=num_workers)
test_iter = gdata.DataLoader(
mnist_test.transform_first(transformer), batch_size, shuffle=False,
num_workers=num_workers)
return train_iter, test_iter
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/train-images-idx3-ubyt
e.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dat
aset/fashion-mnist/train-images-idx3-ubyte.gz...
Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/train-labels-idx1-ubyt
e.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dat
aset/fashion-mnist/train-labels-idx1-ubyte.gz...
Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/t10k-images-idx3-ubyte.
gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/datas
et/fashion-mnist/t10k-images-idx3-ubyte.gz...
Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/t10k-labels-idx1-ubyte.
gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/datas
et/fashion-mnist/t10k-labels-idx1-ubyte.gz...
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 6/7
Training
Training
In [5]: def evaluate_accuracy(data_iter, net, ctx):
acc_sum, n = nd.array([0], ctx=ctx), 0
for X, y in data_iter:
# If ctx is the GPU, copy the data to the GPU.
X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum()
n += y.size
return acc_sum.asscalar() / n
def train(net, train_iter, test_iter, batch_size, trainer, ctx,
num_epochs):
print('training on', ctx)
loss = gloss.SoftmaxCrossEntropyLoss()
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y).sum()
l.backward()
trainer.step(batch_size)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net, ctx)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
time.time() - start))
2/27/2019 alexnet slides
http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 7/7
Compared to LeNet the main change is the smaller learning rate and much slower progress
due to much larger images.
In [6]: lr, num_epochs, ctx = 0.01, 5, d2l.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
training on gpu(0)
epoch 1, loss 1.3052, train acc 0.509, test acc 0.745, time 18.9 sec
epoch 2, loss 0.6477, train acc 0.756, test acc 0.815, time 22.5 sec
epoch 3, loss 0.5331, train acc 0.803, test acc 0.831, time 16.6 sec
epoch 4, loss 0.4651, train acc 0.829, test acc 0.855, time 16.6 sec
epoch 5, loss 0.4276, train acc 0.844, test acc 0.867, time 16.6 sec

More Related Content

Similar to alexnet.pdf

maxbox starter60 machine learning
maxbox starter60 machine learningmaxbox starter60 machine learning
maxbox starter60 machine learningMax Kleiner
 
Accelerating HPC Applications on NVIDIA GPUs with OpenACC
Accelerating HPC Applications on NVIDIA GPUs with OpenACCAccelerating HPC Applications on NVIDIA GPUs with OpenACC
Accelerating HPC Applications on NVIDIA GPUs with OpenACCinside-BigData.com
 
The Ring programming language version 1.8 book - Part 53 of 202
The Ring programming language version 1.8 book - Part 53 of 202The Ring programming language version 1.8 book - Part 53 of 202
The Ring programming language version 1.8 book - Part 53 of 202Mahmoud Samir Fayed
 
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Big Data Spain
 
Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)
Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)
Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)Thomas Fan
 
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its authorKaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its authorVivian S. Zhang
 
python(1).ppt
python(1).pptpython(1).ppt
python(1).ppthamm14
 
Building High-Performance Language Implementations With Low Effort
Building High-Performance Language Implementations With Low EffortBuilding High-Performance Language Implementations With Low Effort
Building High-Performance Language Implementations With Low EffortStefan Marr
 
Idea for ineractive programming language
Idea for ineractive programming languageIdea for ineractive programming language
Idea for ineractive programming languageLincoln Hannah
 
Whats new in ES2019
Whats new in ES2019Whats new in ES2019
Whats new in ES2019chayanikaa
 
Time Series Analysis Sample Code
Time Series Analysis Sample CodeTime Series Analysis Sample Code
Time Series Analysis Sample CodeAiden Wu, FRM
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with ClojureDmitry Buzdin
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwnARUN DN
 
Introduction to cython: example of GCoptimization
Introduction to cython: example of GCoptimizationIntroduction to cython: example of GCoptimization
Introduction to cython: example of GCoptimizationKevin Keraudren
 
The Ring programming language version 1.8 book - Part 12 of 202
The Ring programming language version 1.8 book - Part 12 of 202The Ring programming language version 1.8 book - Part 12 of 202
The Ring programming language version 1.8 book - Part 12 of 202Mahmoud Samir Fayed
 

Similar to alexnet.pdf (20)

Xgboost
XgboostXgboost
Xgboost
 
maxbox starter60 machine learning
maxbox starter60 machine learningmaxbox starter60 machine learning
maxbox starter60 machine learning
 
Accelerating HPC Applications on NVIDIA GPUs with OpenACC
Accelerating HPC Applications on NVIDIA GPUs with OpenACCAccelerating HPC Applications on NVIDIA GPUs with OpenACC
Accelerating HPC Applications on NVIDIA GPUs with OpenACC
 
The Ring programming language version 1.8 book - Part 53 of 202
The Ring programming language version 1.8 book - Part 53 of 202The Ring programming language version 1.8 book - Part 53 of 202
The Ring programming language version 1.8 book - Part 53 of 202
 
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
 
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
 
Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)
Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)
Pydata DC 2018 (Skorch - A Union of Scikit-learn and PyTorch)
 
python.ppt
python.pptpython.ppt
python.ppt
 
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its authorKaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
 
Python Software Testing in Kytos.io
Python Software Testing in Kytos.ioPython Software Testing in Kytos.io
Python Software Testing in Kytos.io
 
python(1).ppt
python(1).pptpython(1).ppt
python(1).ppt
 
Building High-Performance Language Implementations With Low Effort
Building High-Performance Language Implementations With Low EffortBuilding High-Performance Language Implementations With Low Effort
Building High-Performance Language Implementations With Low Effort
 
Idea for ineractive programming language
Idea for ineractive programming languageIdea for ineractive programming language
Idea for ineractive programming language
 
Whats new in ES2019
Whats new in ES2019Whats new in ES2019
Whats new in ES2019
 
Time Series Analysis Sample Code
Time Series Analysis Sample CodeTime Series Analysis Sample Code
Time Series Analysis Sample Code
 
Seminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mmeSeminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mme
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwn
 
Introduction to cython: example of GCoptimization
Introduction to cython: example of GCoptimizationIntroduction to cython: example of GCoptimization
Introduction to cython: example of GCoptimization
 
The Ring programming language version 1.8 book - Part 12 of 202
The Ring programming language version 1.8 book - Part 12 of 202The Ring programming language version 1.8 book - Part 12 of 202
The Ring programming language version 1.8 book - Part 12 of 202
 

Recently uploaded

Application of Residue Theorem to evaluate real integrations.pptx
Application of Residue Theorem to evaluate real integrations.pptxApplication of Residue Theorem to evaluate real integrations.pptx
Application of Residue Theorem to evaluate real integrations.pptx959SahilShah
 
Artificial-Intelligence-in-Electronics (K).pptx
Artificial-Intelligence-in-Electronics (K).pptxArtificial-Intelligence-in-Electronics (K).pptx
Artificial-Intelligence-in-Electronics (K).pptxbritheesh05
 
chaitra-1.pptx fake news detection using machine learning
chaitra-1.pptx  fake news detection using machine learningchaitra-1.pptx  fake news detection using machine learning
chaitra-1.pptx fake news detection using machine learningmisbanausheenparvam
 
Software and Systems Engineering Standards: Verification and Validation of Sy...
Software and Systems Engineering Standards: Verification and Validation of Sy...Software and Systems Engineering Standards: Verification and Validation of Sy...
Software and Systems Engineering Standards: Verification and Validation of Sy...VICTOR MAESTRE RAMIREZ
 
IVE Industry Focused Event - Defence Sector 2024
IVE Industry Focused Event - Defence Sector 2024IVE Industry Focused Event - Defence Sector 2024
IVE Industry Focused Event - Defence Sector 2024Mark Billinghurst
 
Sachpazis Costas: Geotechnical Engineering: A student's Perspective Introduction
Sachpazis Costas: Geotechnical Engineering: A student's Perspective IntroductionSachpazis Costas: Geotechnical Engineering: A student's Perspective Introduction
Sachpazis Costas: Geotechnical Engineering: A student's Perspective IntroductionDr.Costas Sachpazis
 
Introduction to Microprocesso programming and interfacing.pptx
Introduction to Microprocesso programming and interfacing.pptxIntroduction to Microprocesso programming and interfacing.pptx
Introduction to Microprocesso programming and interfacing.pptxvipinkmenon1
 
Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...
Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...
Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...srsj9000
 
main PPT.pptx of girls hostel security using rfid
main PPT.pptx of girls hostel security using rfidmain PPT.pptx of girls hostel security using rfid
main PPT.pptx of girls hostel security using rfidNikhilNagaraju
 
Call Girls Narol 7397865700 Independent Call Girls
Call Girls Narol 7397865700 Independent Call GirlsCall Girls Narol 7397865700 Independent Call Girls
Call Girls Narol 7397865700 Independent Call Girlsssuser7cb4ff
 
Electronically Controlled suspensions system .pdf
Electronically Controlled suspensions system .pdfElectronically Controlled suspensions system .pdf
Electronically Controlled suspensions system .pdfme23b1001
 
Oxy acetylene welding presentation note.
Oxy acetylene welding presentation note.Oxy acetylene welding presentation note.
Oxy acetylene welding presentation note.eptoze12
 
Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)
Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)
Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)dollysharma2066
 
Past, Present and Future of Generative AI
Past, Present and Future of Generative AIPast, Present and Future of Generative AI
Past, Present and Future of Generative AIabhishek36461
 
Biology for Computer Engineers Course Handout.pptx
Biology for Computer Engineers Course Handout.pptxBiology for Computer Engineers Course Handout.pptx
Biology for Computer Engineers Course Handout.pptxDeepakSakkari2
 
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...Soham Mondal
 

Recently uploaded (20)

Application of Residue Theorem to evaluate real integrations.pptx
Application of Residue Theorem to evaluate real integrations.pptxApplication of Residue Theorem to evaluate real integrations.pptx
Application of Residue Theorem to evaluate real integrations.pptx
 
Artificial-Intelligence-in-Electronics (K).pptx
Artificial-Intelligence-in-Electronics (K).pptxArtificial-Intelligence-in-Electronics (K).pptx
Artificial-Intelligence-in-Electronics (K).pptx
 
Design and analysis of solar grass cutter.pdf
Design and analysis of solar grass cutter.pdfDesign and analysis of solar grass cutter.pdf
Design and analysis of solar grass cutter.pdf
 
chaitra-1.pptx fake news detection using machine learning
chaitra-1.pptx  fake news detection using machine learningchaitra-1.pptx  fake news detection using machine learning
chaitra-1.pptx fake news detection using machine learning
 
Software and Systems Engineering Standards: Verification and Validation of Sy...
Software and Systems Engineering Standards: Verification and Validation of Sy...Software and Systems Engineering Standards: Verification and Validation of Sy...
Software and Systems Engineering Standards: Verification and Validation of Sy...
 
IVE Industry Focused Event - Defence Sector 2024
IVE Industry Focused Event - Defence Sector 2024IVE Industry Focused Event - Defence Sector 2024
IVE Industry Focused Event - Defence Sector 2024
 
Sachpazis Costas: Geotechnical Engineering: A student's Perspective Introduction
Sachpazis Costas: Geotechnical Engineering: A student's Perspective IntroductionSachpazis Costas: Geotechnical Engineering: A student's Perspective Introduction
Sachpazis Costas: Geotechnical Engineering: A student's Perspective Introduction
 
Introduction to Microprocesso programming and interfacing.pptx
Introduction to Microprocesso programming and interfacing.pptxIntroduction to Microprocesso programming and interfacing.pptx
Introduction to Microprocesso programming and interfacing.pptx
 
9953056974 Call Girls In South Ex, Escorts (Delhi) NCR.pdf
9953056974 Call Girls In South Ex, Escorts (Delhi) NCR.pdf9953056974 Call Girls In South Ex, Escorts (Delhi) NCR.pdf
9953056974 Call Girls In South Ex, Escorts (Delhi) NCR.pdf
 
Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...
Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...
Gfe Mayur Vihar Call Girls Service WhatsApp -> 9999965857 Available 24x7 ^ De...
 
main PPT.pptx of girls hostel security using rfid
main PPT.pptx of girls hostel security using rfidmain PPT.pptx of girls hostel security using rfid
main PPT.pptx of girls hostel security using rfid
 
Call Girls Narol 7397865700 Independent Call Girls
Call Girls Narol 7397865700 Independent Call GirlsCall Girls Narol 7397865700 Independent Call Girls
Call Girls Narol 7397865700 Independent Call Girls
 
Electronically Controlled suspensions system .pdf
Electronically Controlled suspensions system .pdfElectronically Controlled suspensions system .pdf
Electronically Controlled suspensions system .pdf
 
Oxy acetylene welding presentation note.
Oxy acetylene welding presentation note.Oxy acetylene welding presentation note.
Oxy acetylene welding presentation note.
 
Call Us -/9953056974- Call Girls In Vikaspuri-/- Delhi NCR
Call Us -/9953056974- Call Girls In Vikaspuri-/- Delhi NCRCall Us -/9953056974- Call Girls In Vikaspuri-/- Delhi NCR
Call Us -/9953056974- Call Girls In Vikaspuri-/- Delhi NCR
 
Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)
Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)
Call Us ≽ 8377877756 ≼ Call Girls In Shastri Nagar (Delhi)
 
Past, Present and Future of Generative AI
Past, Present and Future of Generative AIPast, Present and Future of Generative AI
Past, Present and Future of Generative AI
 
young call girls in Rajiv Chowk🔝 9953056974 🔝 Delhi escort Service
young call girls in Rajiv Chowk🔝 9953056974 🔝 Delhi escort Serviceyoung call girls in Rajiv Chowk🔝 9953056974 🔝 Delhi escort Service
young call girls in Rajiv Chowk🔝 9953056974 🔝 Delhi escort Service
 
Biology for Computer Engineers Course Handout.pptx
Biology for Computer Engineers Course Handout.pptxBiology for Computer Engineers Course Handout.pptx
Biology for Computer Engineers Course Handout.pptx
 
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
 

alexnet.pdf

  • 1. 2/27/2019 alexnet slides http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 1/7 AlexNet AlexNet Use it on scaled-up 224x224 dimensional Fashion-MNIST with 10 class output. In [1]: import d2l from mxnet import autograd, gluon, init, nd from mxnet.gluon import data as gdata, nn from mxnet.gluon import loss as gloss import os import sys import time
  • 2. 2/27/2019 alexnet slides http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 2/7 In [2]: net = nn.Sequential() # Use a larger 11 x 11 window to capture objects. Stride of 4 to reduce size. # The number of output channels is much larger than that in LeNet. net.add(nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # Make the convolution window smaller, set padding to 2 for consistent hei ght/width nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # Use three successive convolutional layers and a smaller convolution wind ow. nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'), # Reduce dimensionality nn.MaxPool2D(pool_size=3, strides=2), # Expensive dense layer nn.Dense(4096, activation="relu"), nn.Dropout(0.5), nn.Dense(4096, activation="relu"), nn.Dropout(0.5), # Output layer. Since we are using Fashion-MNIST, n = 10 nn.Dense(10))
  • 3. 2/27/2019 alexnet slides http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 3/7 We construct a single-channel data instance with both height and width of 224 to observe the output shape of each layer. It matches our diagram above. In [3]: X = nd.random.uniform(shape=(1, 1, 224, 224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:t', X.shape) conv0 output shape: (1, 96, 54, 54) pool0 output shape: (1, 96, 26, 26) conv1 output shape: (1, 256, 26, 26) pool1 output shape: (1, 256, 12, 12) conv2 output shape: (1, 384, 12, 12) conv3 output shape: (1, 384, 12, 12) conv4 output shape: (1, 256, 12, 12) pool2 output shape: (1, 256, 5, 5) dense0 output shape: (1, 4096) dropout0 output shape: (1, 4096) dense1 output shape: (1, 4096) dropout1 output shape: (1, 4096) dense2 output shape: (1, 10)
  • 5. 2/27/2019 alexnet slides http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 5/7 In [4]: def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join( '~', '.mxnet', 'datasets', 'fashion-mnist')): root = os.path.expanduser(root) # Expand the user path '~'. transformer = [] if resize: transformer += [gdata.vision.transforms.Resize(resize)] transformer += [gdata.vision.transforms.ToTensor()] transformer = gdata.vision.transforms.Compose(transformer) mnist_train = gdata.vision.FashionMNIST(root=root, train=True) mnist_test = gdata.vision.FashionMNIST(root=root, train=False) num_workers = 0 if sys.platform.startswith('win32') else 4 train_iter = gdata.DataLoader( mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers) test_iter = gdata.DataLoader( mnist_test.transform_first(transformer), batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter batch_size = 128 train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224) Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/train-images-idx3-ubyt e.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dat aset/fashion-mnist/train-images-idx3-ubyte.gz... Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/train-labels-idx1-ubyt e.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dat aset/fashion-mnist/train-labels-idx1-ubyte.gz... Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/t10k-images-idx3-ubyte. gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/datas et/fashion-mnist/t10k-images-idx3-ubyte.gz... Downloading /home/ubuntu/.mxnet/datasets/fashion-mnist/t10k-labels-idx1-ubyte. gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/datas et/fashion-mnist/t10k-labels-idx1-ubyte.gz...
  • 6. 2/27/2019 alexnet slides http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 6/7 Training Training In [5]: def evaluate_accuracy(data_iter, net, ctx): acc_sum, n = nd.array([0], ctx=ctx), 0 for X, y in data_iter: # If ctx is the GPU, copy the data to the GPU. X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32') acc_sum += (net(X).argmax(axis=1) == y).sum() n += y.size return acc_sum.asscalar() / n def train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs): print('training on', ctx) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() trainer.step(batch_size) y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size test_acc = evaluate_accuracy(test_iter, net, ctx) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, ' 'time %.1f sec' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start))
  • 7. 2/27/2019 alexnet slides http://127.0.0.1:8000/alexnet.slides.html?print-pdf#/ 7/7 Compared to LeNet the main change is the smaller learning rate and much slower progress due to much larger images. In [6]: lr, num_epochs, ctx = 0.01, 5, d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs) training on gpu(0) epoch 1, loss 1.3052, train acc 0.509, test acc 0.745, time 18.9 sec epoch 2, loss 0.6477, train acc 0.756, test acc 0.815, time 22.5 sec epoch 3, loss 0.5331, train acc 0.803, test acc 0.831, time 16.6 sec epoch 4, loss 0.4651, train acc 0.829, test acc 0.855, time 16.6 sec epoch 5, loss 0.4276, train acc 0.844, test acc 0.867, time 16.6 sec