2. スパコンでしかできない深層学習
TPU v3
ImageNet SOTA: 90.45%
Top 1 Accuracy
10,000 TPUv3 core days
R
e
s
N
e
t
-
5
0
D
i
s
t
i
l
B
E
R
T
E
L
M
o
B
E
R
T
-
L
a
r
g
e
G
P
T
-
2
M
e
g
a
t
r
o
n
L
M
T
u
r
i
n
g
-
N
L
G
G
P
T
-
3
S
w
i
t
c
h
T
r
a
n
s
f
o
r
m
e
r
10
7
10
8
10
9
10
10
10
11
10
12
10
13
Number
of
parameters
x100,000
計算量上等
18. NumPyだけによる実装
import numpy as np
epochs = 300
batch_size = 32
D_in = 784
H = 100
D_out = 10
learning_rate = 1.0e-06
# create random input and output data
x = np.random.randn(batch_size, D_in)
y = np.random.randn(batch_size, D_out)
# randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
for epoch in range(epochs):
# forward pass
h = x.dot(w1) # h = x * w1
h_r = np.maximum(h, 0) # h_r = ReLU(h)
y_p = h_r.dot(w2) # y_p = h_r * w2
# compute mean squared error and print loss
loss = np.square(y_p - y).sum()
print(epoch, loss)
# backward pass: compute gradients of loss with respect to w2
grad_y_p = 2.0 * (y_p - y)
grad_w2 = h_r.T.dot(grad_y_p)
# backward pass: compute gradients of loss with respect to w1
grad_h_r = grad_y_p.dot(w2.T)
grad_h = grad_h_r.copy()
grad_h[h < 0] = 0
grad_w1 = x.T.dot(grad_h)
# update weights
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
w1 w1 ⌘
@L
@w1
<latexit sha1_base64="kN3sQo8OP8glKG68w4PsUtC/f3k=">AAACMXicbVDLSgNBEJyN73fUo5fBIHgx7IqiR9GLBw8RjArZEHonvcmQ2QczvYaw5Ev8DL/Aq35BbiJ48iecjQGfDQM1VdXTPRWkShpy3ZFTmpqemZ2bX1hcWl5ZXSuvb1ybJNMC6yJRib4NwKCSMdZJksLbVCNEgcKboHdW6Dd3qI1M4isapNiMoBPLUAogS7XKh/2W5ysMCbRO+tze9nwk8EMNIvdT0CRB8YvhF7aWYatccavuuPhf4E1AhU2q1iq/+e1EZBHGJBQY0/DclJp58aRQOFz0M4MpiB50sGFhDBGaZj7+3pDvWKbNw0TbExMfs987coiMGUSBdUZAXfNbK8j/tEZG4XEzl3GaEcbic1CYKU4JL7LibalRkBpYAEJLuysXXbDJkE30x5S2KVYrcvF+p/AXXO9XPbfqXR5UTk4nCc2zLbbNdpnHjtgJO2c1VmeC3bNH9sSenQdn5Lw4r5/WkjPp2WQ/ynn/AO/Rq1o=</latexit>
<latexit sha1_base64="kN3sQo8OP8glKG68w4PsUtC/f3k=">AAACMXicbVDLSgNBEJyN73fUo5fBIHgx7IqiR9GLBw8RjArZEHonvcmQ2QczvYaw5Ev8DL/Aq35BbiJ48iecjQGfDQM1VdXTPRWkShpy3ZFTmpqemZ2bX1hcWl5ZXSuvb1ybJNMC6yJRib4NwKCSMdZJksLbVCNEgcKboHdW6Dd3qI1M4isapNiMoBPLUAogS7XKh/2W5ysMCbRO+tze9nwk8EMNIvdT0CRB8YvhF7aWYatccavuuPhf4E1AhU2q1iq/+e1EZBHGJBQY0/DclJp58aRQOFz0M4MpiB50sGFhDBGaZj7+3pDvWKbNw0TbExMfs987coiMGUSBdUZAXfNbK8j/tEZG4XEzl3GaEcbic1CYKU4JL7LibalRkBpYAEJLuysXXbDJkE30x5S2KVYrcvF+p/AXXO9XPbfqXR5UTk4nCc2zLbbNdpnHjtgJO2c1VmeC3bNH9sSenQdn5Lw4r5/WkjPp2WQ/ynn/AO/Rq1o=</latexit>
<latexit sha1_base64="kN3sQo8OP8glKG68w4PsUtC/f3k=">AAACMXicbVDLSgNBEJyN73fUo5fBIHgx7IqiR9GLBw8RjArZEHonvcmQ2QczvYaw5Ev8DL/Aq35BbiJ48iecjQGfDQM1VdXTPRWkShpy3ZFTmpqemZ2bX1hcWl5ZXSuvb1ybJNMC6yJRib4NwKCSMdZJksLbVCNEgcKboHdW6Dd3qI1M4isapNiMoBPLUAogS7XKh/2W5ysMCbRO+tze9nwk8EMNIvdT0CRB8YvhF7aWYatccavuuPhf4E1AhU2q1iq/+e1EZBHGJBQY0/DclJp58aRQOFz0M4MpiB50sGFhDBGaZj7+3pDvWKbNw0TbExMfs987coiMGUSBdUZAXfNbK8j/tEZG4XEzl3GaEcbic1CYKU4JL7LibalRkBpYAEJLuysXXbDJkE30x5S2KVYrcvF+p/AXXO9XPbfqXR5UTk4nCc2zLbbNdpnHjtgJO2c1VmeC3bNH9sSenQdn5Lw4r5/WkjPp2WQ/ynn/AO/Rq1o=</latexit>
<latexit sha1_base64="kN3sQo8OP8glKG68w4PsUtC/f3k=">AAACMXicbVDLSgNBEJyN73fUo5fBIHgx7IqiR9GLBw8RjArZEHonvcmQ2QczvYaw5Ev8DL/Aq35BbiJ48iecjQGfDQM1VdXTPRWkShpy3ZFTmpqemZ2bX1hcWl5ZXSuvb1ybJNMC6yJRib4NwKCSMdZJksLbVCNEgcKboHdW6Dd3qI1M4isapNiMoBPLUAogS7XKh/2W5ysMCbRO+tze9nwk8EMNIvdT0CRB8YvhF7aWYatccavuuPhf4E1AhU2q1iq/+e1EZBHGJBQY0/DclJp58aRQOFz0M4MpiB50sGFhDBGaZj7+3pDvWKbNw0TbExMfs987coiMGUSBdUZAXfNbK8j/tEZG4XEzl3GaEcbic1CYKU4JL7LibalRkBpYAEJLuysXXbDJkE30x5S2KVYrcvF+p/AXXO9XPbfqXR5UTk4nCc2zLbbNdpnHjtgJO2c1VmeC3bNH9sSenQdn5Lw4r5/WkjPp2WQ/ynn/AO/Rq1o=</latexit>
w2 w2 ⌘
@L
@w2
<latexit sha1_base64="XDVYNpwB7UZogd7iSX6oklwpHpw=">AAACMXicbVDLSgNBEJz1bXxFPXoZDIIXw64oehS9ePAQwZhANoTeSa8Ozj6Y6TWEJV/iZ/gFXvULchPBkz/hbAz4iA0DNVXV0z0VpEoact2hMzU9Mzs3v7BYWlpeWV0rr29cmyTTAusiUYluBmBQyRjrJElhM9UIUaCwEdydFXrjHrWRSXxF/RTbEdzEMpQCyFKd8mGvs+8rDAm0Tnrc3vZ8JPBDDSL3U9AkQfGLwTe2lkGnXHGr7qj4JPDGoMLGVeuU3/1uIrIIYxIKjGl5bkrtvHhSKByU/MxgCuIObrBlYQwRmnY++t6A71imy8NE2xMTH7E/O3KIjOlHgXVGQLfmr1aQ/2mtjMLjdi7jNCOMxdegMFOcEl5kxbtSoyDVtwCElnZXLm7BJkM20V9TuqZYrcjF+5vCJLjer3pu1bs8qJycjhNaYFtsm+0yjx2xE3bOaqzOBHtgT+yZvTiPztB5dd6+rFPOuGeT/Srn4xP07atd</latexit>
<latexit sha1_base64="XDVYNpwB7UZogd7iSX6oklwpHpw=">AAACMXicbVDLSgNBEJz1bXxFPXoZDIIXw64oehS9ePAQwZhANoTeSa8Ozj6Y6TWEJV/iZ/gFXvULchPBkz/hbAz4iA0DNVXV0z0VpEoact2hMzU9Mzs3v7BYWlpeWV0rr29cmyTTAusiUYluBmBQyRjrJElhM9UIUaCwEdydFXrjHrWRSXxF/RTbEdzEMpQCyFKd8mGvs+8rDAm0Tnrc3vZ8JPBDDSL3U9AkQfGLwTe2lkGnXHGr7qj4JPDGoMLGVeuU3/1uIrIIYxIKjGl5bkrtvHhSKByU/MxgCuIObrBlYQwRmnY++t6A71imy8NE2xMTH7E/O3KIjOlHgXVGQLfmr1aQ/2mtjMLjdi7jNCOMxdegMFOcEl5kxbtSoyDVtwCElnZXLm7BJkM20V9TuqZYrcjF+5vCJLjer3pu1bs8qJycjhNaYFtsm+0yjx2xE3bOaqzOBHtgT+yZvTiPztB5dd6+rFPOuGeT/Srn4xP07atd</latexit>
<latexit sha1_base64="XDVYNpwB7UZogd7iSX6oklwpHpw=">AAACMXicbVDLSgNBEJz1bXxFPXoZDIIXw64oehS9ePAQwZhANoTeSa8Ozj6Y6TWEJV/iZ/gFXvULchPBkz/hbAz4iA0DNVXV0z0VpEoact2hMzU9Mzs3v7BYWlpeWV0rr29cmyTTAusiUYluBmBQyRjrJElhM9UIUaCwEdydFXrjHrWRSXxF/RTbEdzEMpQCyFKd8mGvs+8rDAm0Tnrc3vZ8JPBDDSL3U9AkQfGLwTe2lkGnXHGr7qj4JPDGoMLGVeuU3/1uIrIIYxIKjGl5bkrtvHhSKByU/MxgCuIObrBlYQwRmnY++t6A71imy8NE2xMTH7E/O3KIjOlHgXVGQLfmr1aQ/2mtjMLjdi7jNCOMxdegMFOcEl5kxbtSoyDVtwCElnZXLm7BJkM20V9TuqZYrcjF+5vCJLjer3pu1bs8qJycjhNaYFtsm+0yjx2xE3bOaqzOBHtgT+yZvTiPztB5dd6+rFPOuGeT/Srn4xP07atd</latexit>
<latexit sha1_base64="XDVYNpwB7UZogd7iSX6oklwpHpw=">AAACMXicbVDLSgNBEJz1bXxFPXoZDIIXw64oehS9ePAQwZhANoTeSa8Ozj6Y6TWEJV/iZ/gFXvULchPBkz/hbAz4iA0DNVXV0z0VpEoact2hMzU9Mzs3v7BYWlpeWV0rr29cmyTTAusiUYluBmBQyRjrJElhM9UIUaCwEdydFXrjHrWRSXxF/RTbEdzEMpQCyFKd8mGvs+8rDAm0Tnrc3vZ8JPBDDSL3U9AkQfGLwTe2lkGnXHGr7qj4JPDGoMLGVeuU3/1uIrIIYxIKjGl5bkrtvHhSKByU/MxgCuIObrBlYQwRmnY++t6A71imy8NE2xMTH7E/O3KIjOlHgXVGQLfmr1aQ/2mtjMLjdi7jNCOMxdegMFOcEl5kxbtSoyDVtwCElnZXLm7BJkM20V9TuqZYrcjF+5vCJLjer3pu1bs8qJycjhNaYFtsm+0yjx2xE3bOaqzOBHtgT+yZvTiPztB5dd6+rFPOuGeT/Srn4xP07atd</latexit>
@L
@w2
=
@L
@yp
@yp
@w2
=
1
NO
2 (yp y) hr
@L
@w1
=
@L
@yp
@yp
@hr
@hr
@w1
=
1
NO
2 (yp y) w2x
L =
1
NO
X
(yp y)
2
00_numpy.py
19. PyTorch の導入
import torch
epochs = 300
batch_size = 32
D_in = 784
H = 100
D_out = 10
learning_rate = 1.0e-06
# create random input and output data
x = torch.randn(batch_size, D_in)
y = torch.randn(batch_size, D_out)
# randomly initialize weights
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)
for epoch in range(epochs):
# forward pass: compute predicted y
h = x.mm(w1)
h_r = h.clamp(min=0)
y_p = h_r.mm(w2)
# compute and print loss
loss = (y_p - y).pow(2).sum().item()
print(t, loss)
# backward pass: compute gradients of loss with respect to w2
grad_y_p = 2.0 * (y_p - y)
grad_w2 = h_r.t().mm(grad_y_p)
# backward pass: compute gradients of loss with respect to w1
grad_h_r = grad_y_p.mm(w2.t())
grad_h = grad_h_r.clone()
grad_h[h < 0] = 0
grad_w1 = x.t().mm(grad_h)
# update weights
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
np.random torch
np torch
x.dot(w1) x.mm(w1)
np.maximum(h, 0) h.clamp(min=0)
np.square(y_p-y) (y_p-y).pow(2)
copy() clone()
01_tensors.py
20. 自動微分の導入
# randomly initialize weights
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)
for epoch in range(epochs):
# forward pass: compute predicted y
h = x.mm(w1)
h_r = h.clamp(min=0)
y_p = h_r.mm(w2)
# compute and print loss
loss = (y_p - y).pow(2).sum().item()
print(t, loss)
# backward pass: compute gradients of loss
with respect to w2
grad_y_p = 2.0 * (y_p - y)
grad_w2 = h_r.t().mm(grad_y_p)
# backward pass: compute gradients of loss
with respect to w1
grad_h_r = grad_y_p.mm(w2.t())
grad_h = grad_h_r.clone()
grad_h[h < 0] = 0
grad_w1 = x.t().mm(grad_h)
# update weights
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
01_tensor.py 02_autograd.py
# randomly initialize weights
w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)
for epoch in range(epochs):
# forward pass: compute predicted y
h = x.mm(w1)
h_r = h.clamp(min=0)
y_p = h_r.mm(w2)
# compute and print loss
loss = (y_p - y).pow(2).sum()
print(t, loss.item())
# backward pass
loss.backward()
with torch.no_grad():
# update weights
w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad
# initialize weights
w1.grad.zero_()
w2.grad.zero_()
@L
@w1
=
@L
@yp
@yp
@hr
@hr
@w1
=
1
NO
2(yp y)w2x
<latexit sha1_base64="V1OkoDW7pmfxcKKULrjvVELuV8s=">AAACmHicfVHdSsMwGE3r//ybeqc3wSHMC0dTBL0Rhl6oIDrFqbCNkmbpFkx/SFJnKX0TX8wH8D1Mt8JcFT8InJzzfTkfJ27EmVSW9WmYc/MLi0vLK5XVtfWNzerW9pMMY0Fom4Q8FC8ulpSzgLYVU5y+RIJi3+X02X29yPXnNyokC4NHlUS05+NBwDxGsNKUU/3oegKTtBthoRjm8Cab4pGDsrN/9MSJspKcU9Pb0BHlhpyadagUFihLb527zK7rN46Sw5FjvzvVmtWwxgV/A1SAGiiq5VS/uv2QxD4NFOFYyg6yItVLczfCaVbpxpJGmLziAe1oGGCfyl46TjGDB5rpQy8U+gQKjtmfEyn2pUx8V3f6WA1lWcvJv7ROrLzTXsqCKFY0IBMjL+ZQhTD/EthnghLFEw0wEUzvCskQ60yU/rgZl77MV8t0Lqicwm/wZDeQ1UD3x7XmeZHQMtgD+6AOEDgBTXAFWqANiGEadQMZtrlrNs1L83rSahrFzA6YKfPhG92OzlM=</latexit>
<latexit sha1_base64="V1OkoDW7pmfxcKKULrjvVELuV8s=">AAACmHicfVHdSsMwGE3r//ybeqc3wSHMC0dTBL0Rhl6oIDrFqbCNkmbpFkx/SFJnKX0TX8wH8D1Mt8JcFT8InJzzfTkfJ27EmVSW9WmYc/MLi0vLK5XVtfWNzerW9pMMY0Fom4Q8FC8ulpSzgLYVU5y+RIJi3+X02X29yPXnNyokC4NHlUS05+NBwDxGsNKUU/3oegKTtBthoRjm8Cab4pGDsrN/9MSJspKcU9Pb0BHlhpyadagUFihLb527zK7rN46Sw5FjvzvVmtWwxgV/A1SAGiiq5VS/uv2QxD4NFOFYyg6yItVLczfCaVbpxpJGmLziAe1oGGCfyl46TjGDB5rpQy8U+gQKjtmfEyn2pUx8V3f6WA1lWcvJv7ROrLzTXsqCKFY0IBMjL+ZQhTD/EthnghLFEw0wEUzvCskQ60yU/rgZl77MV8t0Lqicwm/wZDeQ1UD3x7XmeZHQMtgD+6AOEDgBTXAFWqANiGEadQMZtrlrNs1L83rSahrFzA6YKfPhG92OzlM=</latexit>
<latexit sha1_base64="V1OkoDW7pmfxcKKULrjvVELuV8s=">AAACmHicfVHdSsMwGE3r//ybeqc3wSHMC0dTBL0Rhl6oIDrFqbCNkmbpFkx/SFJnKX0TX8wH8D1Mt8JcFT8InJzzfTkfJ27EmVSW9WmYc/MLi0vLK5XVtfWNzerW9pMMY0Fom4Q8FC8ulpSzgLYVU5y+RIJi3+X02X29yPXnNyokC4NHlUS05+NBwDxGsNKUU/3oegKTtBthoRjm8Cab4pGDsrN/9MSJspKcU9Pb0BHlhpyadagUFihLb527zK7rN46Sw5FjvzvVmtWwxgV/A1SAGiiq5VS/uv2QxD4NFOFYyg6yItVLczfCaVbpxpJGmLziAe1oGGCfyl46TjGDB5rpQy8U+gQKjtmfEyn2pUx8V3f6WA1lWcvJv7ROrLzTXsqCKFY0IBMjL+ZQhTD/EthnghLFEw0wEUzvCskQ60yU/rgZl77MV8t0Lqicwm/wZDeQ1UD3x7XmeZHQMtgD+6AOEDgBTXAFWqANiGEadQMZtrlrNs1L83rSahrFzA6YKfPhG92OzlM=</latexit>
<latexit sha1_base64="V1OkoDW7pmfxcKKULrjvVELuV8s=">AAACmHicfVHdSsMwGE3r//ybeqc3wSHMC0dTBL0Rhl6oIDrFqbCNkmbpFkx/SFJnKX0TX8wH8D1Mt8JcFT8InJzzfTkfJ27EmVSW9WmYc/MLi0vLK5XVtfWNzerW9pMMY0Fom4Q8FC8ulpSzgLYVU5y+RIJi3+X02X29yPXnNyokC4NHlUS05+NBwDxGsNKUU/3oegKTtBthoRjm8Cab4pGDsrN/9MSJspKcU9Pb0BHlhpyadagUFihLb527zK7rN46Sw5FjvzvVmtWwxgV/A1SAGiiq5VS/uv2QxD4NFOFYyg6yItVLczfCaVbpxpJGmLziAe1oGGCfyl46TjGDB5rpQy8U+gQKjtmfEyn2pUx8V3f6WA1lWcvJv7ROrLzTXsqCKFY0IBMjL+ZQhTD/EthnghLFEw0wEUzvCskQ60yU/rgZl77MV8t0Lqicwm/wZDeQ1UD3x7XmeZHQMtgD+6AOEDgBTXAFWqANiGEadQMZtrlrNs1L83rSahrFzA6YKfPhG92OzlM=</latexit>
微分を自動的に計算してくれる
21. 活性化関数の自作 03_function.py
import torch
for epoch in range(epochs):
# forward pass: compute predicted y
h = x.mm(w1)
h_r = h.clamp(min=0)
y_p = h_r.mm(w2)
02_autograd.py
import torch
class ReLU(torch.autograd.Function):
@staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return input.clamp(min=0)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
grad_input = grad_output.clone()
grad_input[input<0] = 0
return grad_input
for epoch in range(epochs):
# forward pass: compute predicted y
relu = ReLU.apply
h = x.mm(w1)
h_r = relu(h)
y_p = h_r.mm(w2)
.
.
.
.
.
.
y=f(x)
ReLU (Rectified Linear Unit)
22. torch.nnの利用 04_nn_module.py
# create random input and output data
x = torch.randn(batch_size, D_in)
y = torch.randn(batch_size, D_out)
# randomly initialize weights
w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)
for epoch in range(epochs):
# forward pass: compute predicted y
h = x.mm(w1)
h_r = h.clamp(min=0)
y_p = h_r.mm(w2)
# compute and print loss
loss = (y_p - y).pow(2).sum()
print(t, loss.item())
# backward pass
loss.backward()
with torch.no_grad():
# update weights
w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad
# initialize weights
w1.grad.zero_()
w2.grad.zero_()
02_autograd.py
# create random input and output data
x = torch.randn(batch_size, D_in)
y = torch.randn(batch_size, D_out)
# define model
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# define loss function
criterion = torch.nn.MSELoss(reduction='sum')
for epoch in range(epochs):
# forward pass: compute predicted y
y_p = model(x)
# compute and print loss
loss = criterion(y_p, y)
print(t, loss.item())
# backward pass
model.zero_grad()
loss.backward()
with torch.no_grad():
# update weights
for param in model.parameters():
param -= learning_rate * param.grad
23. 最適化関数の呼び出し 05_optimizer.py
04_nn_module.py
# define loss function
criterion = torch.nn.MSELoss(reduction='sum')
for t in range(epochs):
# forward pass: compute predicted y
y_p = model(x)
# compute and print loss
loss = criterion(y_p, y)
print(t, loss.item())
# backward pass
model.zero_grad()
loss.backward()
with torch.no_grad():
# update weights
for param in model.parameters():
param -= learning_rate * param.grad
# define loss function
criterion = torch.nn.MSELoss(reduction='sum')
# define optimizer
optimizer = torch.optim.SGD(model.parameters(),
lr=learning_rate)
for epoch in range(epochs):
# forward pass: compute predicted y
y_p = model(x)
# compute and print loss
loss = criterion(y_p, y)
print(t, loss.item())
# backward pass
optimizer.zero_grad()
loss.backward()
# update weights
optimizer.step()
24. モデルを自作 06_mm_module.py
05_optimizer.py
# create random input and output data
x = torch.randn(batch_size, D_in)
y = torch.randn(batch_size, D_out)
# define model
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# define loss function
criterion = torch.nn.MSELoss(reduction='sum')
import torch.nn as nn
import torch.nn.functional as F
class TwoLayerNet(nn.Module):
def __init__(self, D_in, H, D_out):
super(TwoLayerNet, self).__init__()
self.fc1 = nn.Linear(D_in, H)
self.fc2 = nn.Linear(H, D_out)
def forward(self, x):
h = self.fc1(x)
h_r = F.relu(h)
y_p = self.fc2(h_r)
return y_p
# create random input and output data
x = torch.randn(batch_size, D_in)
y = torch.randn(batch_size, D_out)
# define model
model = TwoLayerNet(D_in, H, D_out)
# define loss function
criterion = nn.MSELoss(reduction='sum')
.
.
.
学習時に不変
25. MNIST Datasetのロード 07_mnist.py
06_mm_module.py
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
# read input data and labels
train_dataset = datasets.MNIST('./data',
train=True,
download=True,
transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
for epoch in range(epochs):
# Set model to training mode
model.train()
# Loop over each batch from the training set
for batch_idx, (x, y) in enumerate(train_loader):
# forward pass: compute predicted y
y_p = model(x)
.
.
.
import torch.nn as nn
import torch.nn.functional as F
# create random input and output data
x = torch.randn(batch_size, D_in)
y = torch.randn(batch_size, D_out)
for t in range(epochs):
# forward pass: compute predicted y
y_p = model(x)
.
.
.
.
.
.