DF1 - Py - Ovcharenko - Theano Tutorial

DataFest Theano tutorial
September 20, 2015
1 Introduction
This is a basic theano tutorial, presented at the Moscow Data Fest: http://www.meetup.com/Moscow-Data-
Fest/events/224856462/.
You can ﬁnd the code here: https://github.com/dudevil/datafest-theano-tutorial/.
1.1 Baby steps
In [1]: import numpy as np
import theano
import theano.tensor as T
%pylab inline
figsize(8, 6)
Populating the interactive namespace from numpy and matplotlib
In [18]: # declare theano variable
a = theano.tensor.lscalar()
#a = theano.tensor.vector()
expression = 1 + 2 * a + a ** 2
f = theano.function(
[a],
expression)
In [7]: #f(0)
result = f(np.arange(-10, 10))
result
Out[7]: array([ 81., 64., 49., 36., 25., 16., 9., 4., 1.,
0., 1., 4., 9., 16., 25., 36., 49., 64.,
81., 100.])
In [8]: plot(np.arange(-10, 10), result, c=’m’, linewidth=2.)
grid()
1

In [9]: # shared variables represent internal state
state = theano.shared(0)
i = T.iscalar(’i’)
accumulator = theano.function([i],
state,
updates=[(state, state+i)])
In [14]: accumulator(5)
Out[14]: array(20)
In [15]: state.set_value(-15)
print state.get_value()
-15
In [19]: state.set_value(0)
f = theano.function(
[i],
expression,
updates=[(state, state+i)],
givens={
a : state
}
)
2

In [25]: f(1)
Out[25]: array(36)
1.2 Data
In [26]: x1 = np.linspace(-1, 1, 100)
x2 = 1.5 - x1 ** 2 + np.random.normal(scale=0.2, size=100)
x3 = np.random.normal(scale=0.3, size=100)
x4 = np.random.normal(scale=0.3, size=100)
permutation = np.random.permutation(np.arange(200))
x = np.hstack((
np.vstack((x1, x2)),
np.vstack((x3, x4)))).T[permutation]
y = np.concatenate((
np.zeros_like(x1),
np.ones_like(x3)))[permutation]
# needed for pictures later
xx, yy = np.mgrid[-2:2:.01, -2:2:.01]
grid_arr = np.c_[xx.ravel(), yy.ravel()]
def plot_decision(predicts):
probas = predicts.reshape(xx.shape)
contour = contourf(xx, yy, probas, 25, cmap="RdBu", vmin=0, vmax=1)
colorbar(contour)
scatter(x[:,0], x[:, 1], c=y, s=50,
cmap="RdBu", vmin=-.2, vmax=1.2,
edgecolor="white", linewidth=1)
title("Some cool decision boundary")
grid()
In [27]: scatter(x[:,0], x[:, 1], c=y, s=75,
cmap="RdBu", vmin=-.2, vmax=1.2,
edgecolor="white", linewidth=1)
title("Toy data")
grid()
3

1.3 Logistic regression
In [29]: # allocate variables
W = theano.shared(
value=numpy.zeros((2, 1),dtype=theano.config.floatX),
name=’W’,
borrow=True)
b = theano.shared(
value=numpy.zeros((1,), dtype=theano.config.floatX),
name=’b’,
borrow=True)
X = T.matrix(’X’)
Y = T.imatrix(’Y’)
index = T.lscalar()
shared_x = theano.shared(x.astype(theano.config.floatX))
shared_y = theano.shared(y.astype(np.int32)[..., np.newaxis])
In [30]: # define model
linear = T.dot(X, W) + b
p_y_given_x = T.nnet.sigmoid(linear)
y_pred = p_y_given_x > 0.5
4

cost = T.nnet.binary_crossentropy(p_y_given_x, Y).mean()
In [32]: # give me the gradients
g_W = T.grad(cost, W)
g_b = T.grad(cost, b)
learning_rate = 0.4
In [33]: batch_size = 4
updates = [(W,W - learning_rate * g_W),
(b, b - 2 * learning_rate * g_b)]
train = theano.function(
[index],
[cost],
updates=updates,
givens={
X: shared_x[index * batch_size: (index + 1) * batch_size],
Y: shared_y[index * batch_size: (index + 1) * batch_size]
}
)
In [34]: ## SGD is love SGD is life
for epoch_ in xrange(150):
loss = []
for iter_ in xrange(100 // batch_size):
loss.append(train(iter_))
e_loss = np.mean(loss)
if not epoch_ % 10:
print e_loss
0.493502346255
0.147674447402
0.128282895388
0.121076048693
0.11739237421
0.115212956857
0.113809215835
0.112853422221
0.112176679133
0.111683459472
0.111315944784
0.111037287761
0.110823034929
0.110656420058
0.110525636027
In [35]: # p_y_given_x = T.nnet.sigmoid(T.dot(X, W) + b)
predict_proba = theano.function(
5

[X],
p_y_given_x
)
probas = predict_proba(grid_arr)
In [36]: plot_decision(probas)
1.4 SVM
In [66]: # reset parameters
W.set_value(numpy.zeros((2, 1),dtype=theano.config.floatX),
borrow=True)
b.set_value(numpy.zeros((1,), dtype=theano.config.floatX),
borrow=True)
In [67]: # this is the only change needed to switch to SVM
y[y == 0] = -1
6

linear = T.dot(X ** 51 + X ** 5 + X ** 2, W) + b
cost = T.maximum(0, 1 - linear * Y).mean() + 2e-3 * (W ** 2).sum()
In [71]: #learning_rate = 0.01
# this code was not changed from above!
shared_x = theano.shared(x.astype(theano.config.floatX))
shared_y = theano.shared(y.astype(np.int32)[..., np.newaxis])
g_W = T.grad(cost, W)
g_b = T.grad(cost, b)
updates = [(W,W - learning_rate * g_W),
(b, b - 2 * learning_rate * g_b)]
train = theano.function(
[index],
[cost],
updates=updates,
givens={
X: shared_x[index * batch_size: (index + 1) * batch_size],
Y: shared_y[index * batch_size: (index + 1) * batch_size]
}
)
for epoch_ in xrange(150):
loss = []
for iter_ in xrange(100 // batch_size):
loss.append(train(iter_))
e_loss = np.mean(loss)
if not epoch_ % 10:
print e_loss
8.07245149444
5.08135669324
2.72128208817
1.32891962237
0.694687232703
0.388649249613
0.235258656813
0.148592129988
0.165618868736
0.165583407441
0.165459371865
0.160225021915
0.160102481692
0.160319361948
0.165628919804
In [64]: predict = theano.function(
[X],
linear > 0
)
In [72]: preds = predict(grid_arr)
plot_decision(preds)
7

DF1 - Py - Ovcharenko - Theano Tutorial

Recommended

Recommended

More Related Content

What's hot

What's hot (19)

Viewers also liked

Viewers also liked (18)

Similar to DF1 - Py - Ovcharenko - Theano Tutorial

Similar to DF1 - Py - Ovcharenko - Theano Tutorial (20)

More from MoscowDataFest

More from MoscowDataFest (7)

Recently uploaded

Recently uploaded (20)

DF1 - Py - Ovcharenko - Theano Tutorial