機会学習レポート1. 機械学習レポート
(1)線形とは
線形とは、簡単にいうと比例である。2次元の場合は、𝑦 = 𝑎𝑥 + 𝑏と表され3次元の場合は、𝑧 = 𝑎𝑥 +
𝑏𝑦 + 𝑐と表される。より一般的に N 次元空間の超平面の方程式としては、
𝑦 = 𝑎0 + 𝑎1𝑥1・・・𝑎𝑛−1𝑥𝑛−1 = ∑ 𝑎𝑘𝑥𝑘
𝑛
𝑘=1
(𝑥0:= 1)
と表現される。
(2)線形回帰モデル
回帰問題を解くためのモデルの一つである。教師データから学習する教師あり学習である。入力と m
次元パラメータの線形結合を出力するモデルである。
未知のパラメータを𝝎 = (𝜔1, 𝜔2,𝜔3, . . . , 𝜔𝑛 )𝑇
とし、切片を𝜔0、予測値を𝑦
̂とすると、
𝑦
̂ = 𝝎𝑇
𝒙 + 𝜔0 = ∑ 𝜔𝑗𝑥𝑗
𝑚
𝑗=1
+ 𝜔0
と表される。
線形回帰モデルのパラメータは最小二乗法で推定する。これは、学習データの平均二乗誤差を最小と
するパラメータを探索するものであり、端的に言えば勾配が 0 となる点を求めるものである。また、こ
のとき以下の関数を用いる:
𝑀𝑆𝐸𝑡𝑟𝑎𝑖𝑛 =
1
𝑛𝑡𝑟𝑎𝑖𝑛
∑ (𝑦𝑖
̂ − 𝑦𝑖 )
𝑛𝑡𝑟𝑎𝑖𝑛
𝑖=1
(2)―1 線形回帰モデルの実装
from sklearn.datasets import load_boston
from pandas import DataFrame
import numpy as np
boston = load_boston()
print(boston['feature_names'])
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' 'B'
'LSTAT']
#説明変数の中身
print(boston['data'])
2. [[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
[2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
[2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
...
[6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
[1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
[4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]
#目的関数の中身
print(boston['target'])
[24. 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 15. 18.9 21.7 20.4 18.2
19.9 23.1 17.5 20.2 18.2 13.6 19.6 15.2 14.5 15.6 13.9 16.6 14.8 18.4 21.
12.7 14.5 13.2 13.1 13.5 18.9 20. 21. 24.7 30.8 34.9 26.6 25.3 24.7 21.2
19.3 20. 16.6 14.4 19.4 19.7 20.5 25. 23.4 18.9 35.4 24.7 31.6 23.3 19.6
18.7 16. 22.2 25. 33. ・・・
#データフレームの作成
#説明変数
df = DataFrame(data=boston.data, columns = boston.feature_names)
#目的変数
df['PRICE'] = np.array(boston.target)
# 学習データ
data = df.loc[:, ['RM', 'LSTAT']].values
# 検証データ
target = df.loc[:, 'PRICE'].values
#モデル生成
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(data, target)
model.predict([[5, 10]])
array([17.69208377])
以上より、部屋数 5、低所得者人口の割合を 10 とした場合、物件の価格は 1769 ドルと予測された。
機械学習を実装する際は、与えられているデータが妥当な数値であるか、外れ値がないかを確認する必
4. 評価用に 2 分割し、学習済みモデルの精度を測定する方法である。どのように 2 分割するかにより学習
の精度と評価の精度はトレードオフとなるため、データが十分多い場合でなければ適切に評価できない。
このように、どのような場合に特定の方法が適切でないかを知ることが重要である。
交差検証法(Cross Validation)とは、データを複数に分割してそのうちの一つを検証用データとし、
その他を学習用データとして学習を繰り返して精度の平均をとる方法である。交差検証法の中でも K-分
割交差検証があり、
これはデータを K 個に分割してそのうちの一つを検証用データとし、
、
残りの K-1 個
を学習用データとして正解率の評価を行う。ホールドアウト法と比べてデータの分割に依存することな
く性能評価のばらつきを抑えることが期待できる。その為、もしホールドアウト法で精度の方が交差検
証法での精度より良かったとしても、性能の推定としては交差検証法でのものを採用するべきである。
グリッドサーチとは、モデルの精度を向上させるために用いられる手法であり、全てのパラメータの
組み合わせを試す方法である。
(3)—1非線形回帰実装演習
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()
sns.set_style("darkgrid", {'grid.linestyle': '--'})
sns.set_context("paper")
n=100
def true_func(x):
z = 1-48*x+218*x**2-315*x**3+145*x**4
return z
def linear_func(x):
z = x
return z
data = np.random.rand(n).astype(np.float32)
data = np.sort(data)
target = true_func(data)
5. noise = 0.5 * np.random.randn(n)
target = target + noise
plt.scatter(data, target)
plt.title('NonLinear Regression')
plt.legend(loc=2)
from sklearn.kernel_ridge import KernelRidge
clf = KernelRidge(alpha=0.0002, kernel='rbf')
clf.fit(data, target)
p_kridge = clf.predict(data)
plt.scatter(data, target, color='blue', label='data')
plt.plot(data, p_kridge, color='orange',
linestyle='-', linewidth=3, markersize=6, label='kernel ridge')
plt.legend()
6. #Ridge
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.linear_model import Ridge
kx = rbf_kernel(X=data, Y=data, gamma=50)
clf = Ridge(alpha=30)
clf.fit(kx, target)
p_ridge = clf.predict(kx)
plt.scatter(data, target,label='data')
for i in range(len(kx)):
plt.plot(data, kx[i], color='black', linestyle='-
', linewidth=1, markersize=3, label='rbf', alpha=0.2)
plt.plot(data, p_ridge, color='green', linestyle='-
', linewidth=1, markersize=3,label='ridge regression')
print(clf.score(kx, target))
7. from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
#PolynomialFeatures(degree=1)
deg = [1,2,3,4,5,6,7,8,9,10]
for d in deg:
regr = Pipeline([
('poly', PolynomialFeatures(degree=d)),
('linear', LinearRegression())
])
regr.fit(data, target)
p_poly = regr.predict(data)
plt.scatter(data, target, label='data')
plt.plot(data, p_poly, label='polynomial of degree %d' % (d))
8. #Lasso
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.linear_model import Lasso
kx = rbf_kernel(X=data, Y=data, gamma=5)
lasso_clf = Lasso(alpha=10000, max_iter=1000)
lasso_clf.fit(kx, target)
p_lasso = lasso_clf.predict(kx)
plt.scatter(data, target)
plt.plot(data, p_lasso, color='green', linestyle='-
', linewidth=3, markersize=3)
print(lasso_clf.score(kx, target))
from sklearn import model_selection, preprocessing, linear_model, svm
clf_svr = svm.SVR(kernel='rbf', C=1e3, gamma=0.1, epsilon=0.1)
clf_svr.fit(data, target)
y_rbf = clf_svr.fit(data, target).predict(data)
plt.scatter(data, target, color='darkorange', label='data')
9. plt.plot(data, y_rbf, color='red', label='Support Vector Regression (RBF)')
plt.legend()
plt.show()
from keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint
cb_cp = ModelCheckpoint('/content/drive/My Drive/study_ai_ml/skl_ml/out/che
ckpoints/weights.{epoch:02d}-
{val_loss:.2f}.hdf5', verbose=1, save_weights_only=True)
cb_tf = TensorBoard(log_dir='/content/drive/My Drive/study_ai_ml/skl_ml/ou
t/tensorBoard', histogram_freq=0)
def relu_reg_model():
model = Sequential()
model.add(Dense(10, input_dim=1, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation='linear'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
10. return model
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, BatchNormalization
from keras.wrappers.scikit_learn import KerasRegressor
estimator = KerasRegressor(build_fn=relu_reg_model, epochs=100, batch_size=
5, verbose=1)
history = estimator.fit(x_train, y_train, callbacks=[cb_cp, cb_tf], validat
ion_data=(x_test, y_test))
Epoch 1/100
18/18 [==============================] - ETA: 0s - loss: 1.4970
Epoch 00001: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.01-1.40.hdf5
18/18 [==============================] - 54s 47ms/step - loss: 1.4970 -
val_loss: 1.4008
Epoch 2/100
18/18 [==============================] - ETA: 0s - loss: 1.0365
Epoch 00002: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.02-1.23.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 1.0365 -
val_loss: 1.2303
Epoch 3/100
18/18 [==============================] - ETA: 0s - loss: 1.0787
Epoch 00003: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.03-0.78.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 1.0787 -
val_loss: 0.7773
Epoch 4/100
13/18 [====================>.........] - ETA: 0s - loss: 0.8801
Epoch 00004: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.04-0.86.hdf5
18/18 [==============================] - 0s 19ms/step - loss: 1.0361 -
val_loss: 0.8603
Epoch 5/100
18/18 [==============================] - ETA: 0s - loss: 1.1011
11. Epoch 00005: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.05-0.78.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 1.1011 -
val_loss: 0.7806
Epoch 6/100
13/18 [====================>.........] - ETA: 0s - loss: 1.0690
Epoch 00006: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.06-0.87.hdf5
18/18 [==============================] - 1s 37ms/step - loss: 1.0570 -
val_loss: 0.8701
Epoch 7/100
16/18 [=========================>....] - ETA: 0s - loss: 0.9791
Epoch 00007: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.07-0.78.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.9688 -
val_loss: 0.7768
Epoch 8/100
18/18 [==============================] - ETA: 0s - loss: 0.9713
Epoch 00008: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.08-0.70.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.9713 -
val_loss: 0.6964
Epoch 9/100
13/18 [====================>.........] - ETA: 0s - loss: 0.7272
Epoch 00009: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.09-0.69.hdf5
18/18 [==============================] - 0s 19ms/step - loss: 0.9065 -
val_loss: 0.6937
Epoch 10/100
13/18 [====================>.........] - ETA: 0s - loss: 1.0190
Epoch 00010: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.10-1.59.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 1.2161 -
val_loss: 1.5882
Epoch 11/100
18/18 [==============================] - ETA: 0s - loss: 1.3504
Epoch 00011: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.11-1.05.hdf5
12. 18/18 [==============================] - 0s 22ms/step - loss: 1.3504 -
val_loss: 1.0513
Epoch 12/100
17/18 [===========================>..] - ETA: 0s - loss: 0.8982
Epoch 00012: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.12-0.52.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.8753 -
val_loss: 0.5176
Epoch 13/100
18/18 [==============================] - ETA: 0s - loss: 0.7692
Epoch 00013: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.13-0.57.hdf5
18/18 [==============================] - 0s 19ms/step - loss: 0.7692 -
val_loss: 0.5701
Epoch 14/100
17/18 [===========================>..] - ETA: 0s - loss: 1.0020
Epoch 00014: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.14-0.66.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 1.0243 -
val_loss: 0.6622
Epoch 15/100
17/18 [===========================>..] - ETA: 0s - loss: 1.0726
Epoch 00015: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.15-0.54.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 1.0356 -
val_loss: 0.5446
Epoch 16/100
13/18 [====================>.........] - ETA: 0s - loss: 1.0199
Epoch 00016: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.16-0.76.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 1.1688 -
val_loss: 0.7564
Epoch 17/100
13/18 [====================>.........] - ETA: 0s - loss: 0.9960
Epoch 00017: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.17-0.56.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.9017 -
val_loss: 0.5562
13. Epoch 18/100
17/18 [===========================>..] - ETA: 0s - loss: 1.0072
Epoch 00018: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.18-0.57.hdf5
18/18 [==============================] - 0s 19ms/step - loss: 0.9993 -
val_loss: 0.5741
Epoch 19/100
13/18 [====================>.........] - ETA: 0s - loss: 1.1606
Epoch 00019: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.19-0.58.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 1.0023 -
val_loss: 0.5846
Epoch 20/100
13/18 [====================>.........] - ETA: 0s - loss: 0.7868
Epoch 00020: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.20-0.54.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.7953 -
val_loss: 0.5443
Epoch 21/100
13/18 [====================>.........] - ETA: 0s - loss: 0.8356
Epoch 00021: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.21-0.48.hdf5
18/18 [==============================] - 0s 23ms/step - loss: 0.8720 -
val_loss: 0.4791
Epoch 22/100
13/18 [====================>.........] - ETA: 0s - loss: 0.7485
Epoch 00022: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.22-0.65.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.9276 -
val_loss: 0.6498
Epoch 23/100
18/18 [==============================] - ETA: 0s - loss: 0.8111
Epoch 00023: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.23-0.30.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.8111 -
val_loss: 0.2986
Epoch 24/100
18/18 [==============================] - ETA: 0s - loss: 0.6792
14. Epoch 00024: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.24-0.45.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.6792 -
val_loss: 0.4528
Epoch 25/100
18/18 [==============================] - ETA: 0s - loss: 0.7219
Epoch 00025: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.25-0.53.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.7219 -
val_loss: 0.5332
Epoch 26/100
13/18 [====================>.........] - ETA: 0s - loss: 0.5944
Epoch 00026: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.26-0.27.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.5670 -
val_loss: 0.2667
Epoch 27/100
13/18 [====================>.........] - ETA: 0s - loss: 0.7009
Epoch 00027: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.27-0.48.hdf5
18/18 [==============================] - 1s 49ms/step - loss: 0.6354 -
val_loss: 0.4829
Epoch 28/100
17/18 [===========================>..] - ETA: 0s - loss: 0.6042
Epoch 00028: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.28-0.57.hdf5
18/18 [==============================] - 1s 37ms/step - loss: 0.6184 -
val_loss: 0.5654
Epoch 29/100
16/18 [=========================>....] - ETA: 0s - loss: 0.6234
Epoch 00029: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.29-0.53.hdf5
18/18 [==============================] - 1s 37ms/step - loss: 0.6543 -
val_loss: 0.5276
Epoch 30/100
17/18 [===========================>..] - ETA: 0s - loss: 0.5493
Epoch 00030: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.30-0.44.hdf5
15. 18/18 [==============================] - 1s 35ms/step - loss: 0.6120 -
val_loss: 0.4370
Epoch 31/100
16/18 [=========================>....] - ETA: 0s - loss: 0.5309
Epoch 00031: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.31-0.27.hdf5
18/18 [==============================] - 1s 36ms/step - loss: 0.5201 -
val_loss: 0.2718
Epoch 32/100
18/18 [==============================] - ETA: 0s - loss: 0.6879
Epoch 00032: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.32-0.33.hdf5
18/18 [==============================] - 1s 34ms/step - loss: 0.6879 -
val_loss: 0.3334
Epoch 33/100
18/18 [==============================] - ETA: 0s - loss: 0.4650
Epoch 00033: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.33-0.25.hdf5
18/18 [==============================] - 1s 34ms/step - loss: 0.4650 -
val_loss: 0.2519
Epoch 34/100
15/18 [========================>.....] - ETA: 0s - loss: 0.4204
Epoch 00034: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.34-0.32.hdf5
18/18 [==============================] - 1s 30ms/step - loss: 0.4545 -
val_loss: 0.3152
Epoch 35/100
17/18 [===========================>..] - ETA: 0s - loss: 0.5220
Epoch 00035: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.35-0.21.hdf5
18/18 [==============================] - 1s 36ms/step - loss: 0.5212 -
val_loss: 0.2088
Epoch 36/100
18/18 [==============================] - ETA: 0s - loss: 0.5734
Epoch 00036: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.36-0.43.hdf5
18/18 [==============================] - 1s 32ms/step - loss: 0.5734 -
val_loss: 0.4331
16. Epoch 37/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4994
Epoch 00037: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.37-0.19.hdf5
18/18 [==============================] - 1s 34ms/step - loss: 0.5175 -
val_loss: 0.1870
Epoch 38/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4138
Epoch 00038: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.38-0.33.hdf5
18/18 [==============================] - 1s 37ms/step - loss: 0.4101 -
val_loss: 0.3313
Epoch 39/100
17/18 [===========================>..] - ETA: 0s - loss: 0.3975
Epoch 00039: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.39-0.14.hdf5
18/18 [==============================] - 1s 33ms/step - loss: 0.4122 -
val_loss: 0.1382
Epoch 40/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4516
Epoch 00040: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.40-0.21.hdf5
18/18 [==============================] - 1s 29ms/step - loss: 0.4309 -
val_loss: 0.2080
Epoch 41/100
15/18 [========================>.....] - ETA: 0s - loss: 0.3911
Epoch 00041: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.41-0.31.hdf5
18/18 [==============================] - 1s 32ms/step - loss: 0.4186 -
val_loss: 0.3148
Epoch 42/100
15/18 [========================>.....] - ETA: 0s - loss: 0.4348
Epoch 00042: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.42-0.47.hdf5
18/18 [==============================] - 0s 23ms/step - loss: 0.4382 -
val_loss: 0.4730
Epoch 43/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3771
17. Epoch 00043: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.43-0.48.hdf5
18/18 [==============================] - 1s 35ms/step - loss: 0.4101 -
val_loss: 0.4830
Epoch 44/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4165
Epoch 00044: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.44-0.27.hdf5
18/18 [==============================] - 0s 25ms/step - loss: 0.4258 -
val_loss: 0.2687
Epoch 45/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4773
Epoch 00045: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.45-0.45.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.4690 -
val_loss: 0.4504
Epoch 46/100
15/18 [========================>.....] - ETA: 0s - loss: 0.4449
Epoch 00046: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.46-0.31.hdf5
18/18 [==============================] - 0s 26ms/step - loss: 0.4446 -
val_loss: 0.3113
Epoch 47/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4729
Epoch 00047: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.47-0.13.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.4694 -
val_loss: 0.1278
Epoch 48/100
17/18 [===========================>..] - ETA: 0s - loss: 0.5604
Epoch 00048: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.48-0.24.hdf5
18/18 [==============================] - 0s 23ms/step - loss: 0.5688 -
val_loss: 0.2362
Epoch 49/100
14/18 [======================>.......] - ETA: 0s - loss: 0.4172
Epoch 00049: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.49-0.29.hdf5
18. 18/18 [==============================] - 0s 22ms/step - loss: 0.4269 -
val_loss: 0.2896
Epoch 50/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3632
Epoch 00050: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.50-0.44.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.4026 -
val_loss: 0.4428
Epoch 51/100
18/18 [==============================] - ETA: 0s - loss: 0.4762
Epoch 00051: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.51-0.27.hdf5
18/18 [==============================] - 0s 25ms/step - loss: 0.4762 -
val_loss: 0.2732
Epoch 52/100
18/18 [==============================] - ETA: 0s - loss: 0.4814
Epoch 00052: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.52-0.34.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.4814 -
val_loss: 0.3373
Epoch 53/100
15/18 [========================>.....] - ETA: 0s - loss: 0.3512
Epoch 00053: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.53-0.34.hdf5
18/18 [==============================] - 1s 29ms/step - loss: 0.4053 -
val_loss: 0.3443
Epoch 54/100
15/18 [========================>.....] - ETA: 0s - loss: 0.3945
Epoch 00054: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.54-0.57.hdf5
18/18 [==============================] - 0s 27ms/step - loss: 0.3830 -
val_loss: 0.5662
Epoch 55/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4613
Epoch 00055: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.55-0.35.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.4314 -
val_loss: 0.3521
19. Epoch 56/100
18/18 [==============================] - ETA: 0s - loss: 0.4694
Epoch 00056: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.56-0.40.hdf5
18/18 [==============================] - 0s 26ms/step - loss: 0.4694 -
val_loss: 0.4006
Epoch 57/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4707
Epoch 00057: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.57-0.49.hdf5
18/18 [==============================] - 1s 63ms/step - loss: 0.4689 -
val_loss: 0.4915
Epoch 58/100
15/18 [========================>.....] - ETA: 0s - loss: 0.3957
Epoch 00058: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.58-0.36.hdf5
18/18 [==============================] - 1s 29ms/step - loss: 0.4063 -
val_loss: 0.3581
Epoch 59/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3688
Epoch 00059: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.59-0.52.hdf5
18/18 [==============================] - 0s 24ms/step - loss: 0.4232 -
val_loss: 0.5172
Epoch 60/100
13/18 [====================>.........] - ETA: 0s - loss: 0.4195
Epoch 00060: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.60-0.31.hdf5
18/18 [==============================] - 0s 19ms/step - loss: 0.4001 -
val_loss: 0.3060
Epoch 61/100
13/18 [====================>.........] - ETA: 0s - loss: 0.3783
Epoch 00061: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.61-0.47.hdf5
18/18 [==============================] - 0s 28ms/step - loss: 0.3397 -
val_loss: 0.4712
Epoch 62/100
18/18 [==============================] - ETA: 0s - loss: 0.3534
20. Epoch 00062: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.62-0.18.hdf5
18/18 [==============================] - 1s 39ms/step - loss: 0.3534 -
val_loss: 0.1793
Epoch 63/100
14/18 [======================>.......] - ETA: 0s - loss: 0.3766
Epoch 00063: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.63-0.35.hdf5
18/18 [==============================] - 1s 32ms/step - loss: 0.4248 -
val_loss: 0.3517
Epoch 64/100
15/18 [========================>.....] - ETA: 0s - loss: 0.3594
Epoch 00064: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.64-0.23.hdf5
18/18 [==============================] - 0s 26ms/step - loss: 0.3984 -
val_loss: 0.2275
Epoch 65/100
15/18 [========================>.....] - ETA: 0s - loss: 0.3510
Epoch 00065: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.65-0.51.hdf5
18/18 [==============================] - 0s 28ms/step - loss: 0.3611 -
val_loss: 0.5065
Epoch 66/100
15/18 [========================>.....] - ETA: 0s - loss: 0.5084
Epoch 00066: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.66-0.20.hdf5
18/18 [==============================] - 1s 29ms/step - loss: 0.4861 -
val_loss: 0.2032
Epoch 67/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4373
Epoch 00067: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.67-0.44.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 0.4190 -
val_loss: 0.4426
Epoch 68/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4039
Epoch 00068: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.68-0.43.hdf5
21. 18/18 [==============================] - 0s 20ms/step - loss: 0.4032 -
val_loss: 0.4322
Epoch 69/100
17/18 [===========================>..] - ETA: 0s - loss: 0.3376
Epoch 00069: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.69-0.32.hdf5
18/18 [==============================] - 0s 28ms/step - loss: 0.3484 -
val_loss: 0.3179
Epoch 70/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3264
Epoch 00070: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.70-0.51.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.3609 -
val_loss: 0.5140
Epoch 71/100
15/18 [========================>.....] - ETA: 0s - loss: 0.4209
Epoch 00071: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.71-0.41.hdf5
18/18 [==============================] - 0s 24ms/step - loss: 0.4237 -
val_loss: 0.4097
Epoch 72/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3472
Epoch 00072: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.72-0.25.hdf5
18/18 [==============================] - 0s 27ms/step - loss: 0.3324 -
val_loss: 0.2460
Epoch 73/100
18/18 [==============================] - ETA: 0s - loss: 0.3513
Epoch 00073: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.73-0.25.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.3513 -
val_loss: 0.2493
Epoch 74/100
18/18 [==============================] - ETA: 0s - loss: 0.4415
Epoch 00074: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.74-0.59.hdf5
18/18 [==============================] - 0s 28ms/step - loss: 0.4415 -
val_loss: 0.5935
22. Epoch 75/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4269
Epoch 00075: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.75-0.22.hdf5
18/18 [==============================] - 1s 30ms/step - loss: 0.4041 -
val_loss: 0.2155
Epoch 76/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4135
Epoch 00076: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.76-0.33.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 0.4064 -
val_loss: 0.3325
Epoch 77/100
13/18 [====================>.........] - ETA: 0s - loss: 0.3699
Epoch 00077: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.77-0.29.hdf5
18/18 [==============================] - 0s 23ms/step - loss: 0.4130 -
val_loss: 0.2863
Epoch 78/100
13/18 [====================>.........] - ETA: 0s - loss: 0.3713
Epoch 00078: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.78-0.35.hdf5
18/18 [==============================] - 0s 20ms/step - loss: 0.3957 -
val_loss: 0.3496
Epoch 79/100
18/18 [==============================] - ETA: 0s - loss: 0.3687
Epoch 00079: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.79-0.26.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 0.3687 -
val_loss: 0.2560
Epoch 80/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4034
Epoch 00080: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.80-0.39.hdf5
18/18 [==============================] - 1s 30ms/step - loss: 0.3998 -
val_loss: 0.3929
Epoch 81/100
14/18 [======================>.......] - ETA: 0s - loss: 0.4312
23. Epoch 00081: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.81-0.34.hdf5
18/18 [==============================] - 0s 27ms/step - loss: 0.4488 -
val_loss: 0.3411
Epoch 82/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4126
Epoch 00082: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.82-0.26.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.4313 -
val_loss: 0.2648
Epoch 83/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4351
Epoch 00083: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.83-0.33.hdf5
18/18 [==============================] - 0s 24ms/step - loss: 0.4272 -
val_loss: 0.3347
Epoch 84/100
18/18 [==============================] - ETA: 0s - loss: 0.4475
Epoch 00084: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.84-0.33.hdf5
18/18 [==============================] - 0s 22ms/step - loss: 0.4475 -
val_loss: 0.3264
Epoch 85/100
18/18 [==============================] - ETA: 0s - loss: 0.3693
Epoch 00085: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.85-0.37.hdf5
18/18 [==============================] - 1s 32ms/step - loss: 0.3693 -
val_loss: 0.3655
Epoch 86/100
17/18 [===========================>..] - ETA: 0s - loss: 0.3580
Epoch 00086: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.86-0.41.hdf5
18/18 [==============================] - 1s 37ms/step - loss: 0.3735 -
val_loss: 0.4125
Epoch 87/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3580
Epoch 00087: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.87-0.19.hdf5
24. 18/18 [==============================] - 1s 32ms/step - loss: 0.3437 -
val_loss: 0.1926
Epoch 88/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4202
Epoch 00088: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.88-0.36.hdf5
18/18 [==============================] - 0s 25ms/step - loss: 0.4028 -
val_loss: 0.3623
Epoch 89/100
17/18 [===========================>..] - ETA: 0s - loss: 0.4340
Epoch 00089: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.89-0.34.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.4233 -
val_loss: 0.3403
Epoch 90/100
16/18 [=========================>....] - ETA: 0s - loss: 0.3153
Epoch 00090: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.90-0.40.hdf5
18/18 [==============================] - 1s 29ms/step - loss: 0.3683 -
val_loss: 0.4010
Epoch 91/100
18/18 [==============================] - ETA: 0s - loss: 0.3978
Epoch 00091: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.91-0.39.hdf5
18/18 [==============================] - 1s 32ms/step - loss: 0.3978 -
val_loss: 0.3865
Epoch 92/100
15/18 [========================>.....] - ETA: 0s - loss: 0.4476
Epoch 00092: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.92-0.52.hdf5
18/18 [==============================] - 0s 27ms/step - loss: 0.4495 -
val_loss: 0.5152
Epoch 93/100
16/18 [=========================>....] - ETA: 0s - loss: 0.4892
Epoch 00093: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.93-0.53.hdf5
18/18 [==============================] - 0s 26ms/step - loss: 0.4938 -
val_loss: 0.5289
25. Epoch 94/100
13/18 [====================>.........] - ETA: 0s - loss: 0.4092
Epoch 00094: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.94-0.17.hdf5
18/18 [==============================] - 0s 19ms/step - loss: 0.4086 -
val_loss: 0.1733
Epoch 95/100
18/18 [==============================] - ETA: 0s - loss: 0.3723
Epoch 00095: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.95-0.27.hdf5
18/18 [==============================] - 1s 32ms/step - loss: 0.3723 -
val_loss: 0.2742
Epoch 96/100
14/18 [======================>.......] - ETA: 0s - loss: 0.2845
Epoch 00096: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.96-0.27.hdf5
18/18 [==============================] - 1s 29ms/step - loss: 0.3450 -
val_loss: 0.2736
Epoch 97/100
18/18 [==============================] - ETA: 0s - loss: 0.3498
Epoch 00097: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.97-0.48.hdf5
18/18 [==============================] - 0s 26ms/step - loss: 0.3498 -
val_loss: 0.4829
Epoch 98/100
18/18 [==============================] - ETA: 0s - loss: 0.3864
Epoch 00098: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.98-0.42.hdf5
18/18 [==============================] - 0s 28ms/step - loss: 0.3864 -
val_loss: 0.4167
Epoch 99/100
18/18 [==============================] - ETA: 0s - loss: 0.3278
Epoch 00099: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.99-0.44.hdf5
18/18 [==============================] - 0s 21ms/step - loss: 0.3278 -
val_loss: 0.4359
Epoch 100/100
17/18 [===========================>..] - ETA: 0s - loss: 0.3740
26. Epoch 00100: saving model to /content/drive/My
Drive/study_ai_ml/skl_ml/out/checkpoints/weights.100-0.35.hdf5
18/18 [==============================] - 0s 28ms/step - loss: 0.3784 -
val_loss: 0.3542
y_pred = estimator.predict(x_train)
plt.title('NonLiner Regressions via DL by ReLU')
plt.plot(data, target, 'o')
plt.plot(data, true_func(data), '.')
plt.plot(x_train, y_pred, "o", label='predicted: deep learning')
print(lasso_clf.coef_)
[-0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.
-0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.
-0. -0. -0. -0. -0. -0. -0. -0. -0. -0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
(4)ロジスティック回帰モデル
ロジスティック回帰は教師ありの分類モデルである。出力を 0~1 までの数で表すことができ、確率と
27. して考えることができる。確率として出力できることから、判断を保留することができる(どのくらい 1
に近いか、0 に近いかといった精度を見ることができる(識別関数ではこれを見ることはできない)
)
。
シグモイド関数は、以下の式で与えられる:
𝜎(𝑥) ≔
1
1 + 𝑒−ℎ𝑥
また、その微分系は以下の式となり、シグモイド関数の微分は、シグモイド関数でかくことができる:
𝜎′(𝑥) = ℎ𝜎(𝑥)(1 − 𝜎(𝑥))
最尤推定法とは、尤度関数を最大化するようなパラメータを選ぶ推定方法である。尤度関数を最大と
するパラメータを探すには、
対数をとると微分の計算が簡単にある。
これは、
尤度関数での計算は確率同
士をかけていくことになるので、とても小さい数となり桁落ちする可能性がある。そこで対数を取るこ
とが必要となる。対数尤度関数が最大になる点が、尤度関数が最大となる点である。
(4)—1ロジスティック回帰実装演習
import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
titanic_df = pd.read_csv('/content/drive/My Drive/study_ai_ml/data/titanic_
train.csv')
titanic_df.head(5)
#不要なデータの削除/補完
titanic_df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace
=True)
titanic_df['AgeFill'] = titanic_df['Age'].fillna(titanic_df['Age'].mean())
28. data1 = titanic_df.loc[:, ["Fare"]].values
label1 = titanic_df.loc[:,["Survived"]].values
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(data1, label1)
model.predict([[61]])
array([0])
model.predict_proba([[62]])
array([[0.49978123, 0.50021877]])
# モデルの切片
print (model.intercept_)
# モデルの回帰係数
print (model.coef_)
[-0.94131796]
[[0.01519666]]
w_0 = model.intercept_[0]
w_1 = model.coef_[0,0]
def sigmoid(x):
return 1 / (1+np.exp(-(w_1*x+w_0)))
x_range = np.linspace(-1, 500, 3000)
plt.figure(figsize=(9,5))
plt.legend(loc=2)
plt.plot(data1,np.zeros(len(data1)), 'o')
plt.plot(data1, model.predict_proba(data1), 'o')
plt.plot(x_range, sigmoid(x_range), '-')
29. titanic_df['Gender'] = titanic_df['Sex'].map({'female': 0, 'male': 1}).asty
pe(int)
titanic_df['Pclass_Gender'] = titanic_df['Pclass'] + titanic_df['Gender']
titanic_df = titanic_df.drop(['Pclass', 'Sex', 'Gender','Age'], axis=1)
np.random.seed = 0
xmin, xmax = -5, 85
ymin, ymax = 0.5, 4.5
index_survived = titanic_df[titanic_df["Survived"]==0].index
index_notsurvived = titanic_df[titanic_df["Survived"]==1].index
from matplotlib.colors import ListedColormap
fig, ax = plt.subplots()
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
sc = ax.scatter(titanic_df.loc[index_survived, 'AgeFill'],
titanic_df.loc[index_survived, 'Pclass_Gender']+(np.random.
rand(len(index_survived))-0.5)*0.1,
color='r', label='Not Survived', alpha=0.3)
sc = ax.scatter(titanic_df.loc[index_notsurvived, 'AgeFill'],
titanic_df.loc[index_notsurvived, 'Pclass_Gender']+(np.rand
om.rand(len(index_notsurvived))-0.5)*0.1,
30. color='b', label='Survived', alpha=0.3)
ax.set_xlabel('AgeFill')
ax.set_ylabel('Pclass_Gender')
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
ax.legend(bbox_to_anchor=(1.4, 1.03))
data2 = titanic_df.loc[:, ["AgeFill", "Pclass_Gender"]].values
label2 = titanic_df.loc[:,["Survived"]].values
model2 = LogisticRegression()
model2.fit(data2, label2)
model2.predict([[10,1]])
array([1])
model2.predict_proba([[10,1]])
array([[0.03754749, 0.96245251]])
h = 0.02
xmin, xmax = -5, 85
ymin, ymax = 0.5, 4.5
xx, yy = np.meshgrid(np.arange(xmin, xmax, h), np.arange(ymin, ymax, h))
Z = model2.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)
31. fig, ax = plt.subplots()
levels = np.linspace(0, 1.0)
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
#contour = ax.contourf(xx, yy, Z, cmap=cm, levels=levels, alpha=0.5)
sc = ax.scatter(titanic_df.loc[index_survived, 'AgeFill'],
titanic_df.loc[index_survived, 'Pclass_Gender']+(np.random.
rand(len(index_survived))-0.5)*0.1,
color='r', label='Not Survived', alpha=0.3)
sc = ax.scatter(titanic_df.loc[index_notsurvived, 'AgeFill'],
titanic_df.loc[index_notsurvived, 'Pclass_Gender']+(np.rand
om.rand(len(index_notsurvived))-0.5)*0.1,
color='b', label='Survived', alpha=0.3)
ax.set_xlabel('AgeFill')
ax.set_ylabel('Pclass_Gender')
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
#fig.colorbar(contour)
x1 = xmin
x2 = xmax
y1 = -1*(model2.intercept_[0]+model2.coef_[0][0]*xmin)/model2.coef_[0][1]
y2 = -1*(model2.intercept_[0]+model2.coef_[0][0]*xmax)/model2.coef_[0][1]
ax.plot([x1, x2] ,[y1, y2], 'k--')
32. from sklearn.model_selection import train_test_split
traindata1, testdata1, trainlabel1, testlabel1 = train_test_split(data1, la
bel1, test_size=0.2)
traindata2, testdata2, trainlabel2, testlabel2 = train_test_split(data2, la
bel2, test_size=0.2)
data = titanic_df.loc[:, ].values
label = titanic_df.loc[:,["Survived"]].values
traindata, testdata, trainlabel, testlabel = train_test_split(data, label,
test_size=0.2)
eval_model1=LogisticRegression()
eval_model2=LogisticRegression()
predictor_eval1=eval_model1.fit(traindata1, trainlabel1).predict(testdata1)
predictor_eval2=eval_model2.fit(traindata2, trainlabel2).predict(testdata2)
eval_model1.score(traindata1, trainlabel1)
0.6671348314606742
eval_model1.score(testdata1,testlabel1)
0.7206703910614525
eval_model2.score(traindata2, trainlabel2)
0.773876404494382
eval_model2.score(testdata2,testlabel2)
0.7988826815642458
from sklearn import metrics
print(metrics.classification_report(testlabel1, predictor_eval1))
print(metrics.classification_report(testlabel2, predictor_eval2))
from sklearn.metrics import confusion_matrix
confusion_matrix1=confusion_matrix(testlabel1, predictor_eval1)
confusion_matrix2=confusion_matrix(testlabel2, predictor_eval2)
33. confusion_matrix1
array([[110, 12],
[ 38, 19]])
confusion_matrix2
array([[100, 21],
[ 15, 43]])
fig = plt.figure(figsize = (7,7))
sns.heatmap(
confusion_matrix1,
vmin=None,
vmax=None,
cmap="Blues",
center=None,
robust=False,
annot=True, fmt='.2g',
annot_kws=None,
linewidths=0,
linecolor='white',
cbar=True,
cbar_kws=None,
cbar_ax=None,
square=True, ax=None,
mask=None)
34. fig = plt.figure(figsize = (7,7))
sns.heatmap(
confusion_matrix2,
vmin=None,
vmax=None,
cmap="Blues",
center=None,
robust=False,
annot=True, fmt='.2g',
annot_kws=None,
linewidths=0,
linecolor='white',
cbar=True,
cbar_kws=None,
cbar_ax=None,
square=True, ax=None,
mask=None)
35. import seaborn as sns
sns.set(style="whitegrid")
titanic = sns.load_dataset("titanic")
g = sns.PairGrid(titanic, y_vars="survived",
x_vars=["class", "sex", "who", "alone"],
size=5, aspect=.5)
g.map(sns.pointplot, color=sns.xkcd_rgb["plum"])
g.set(ylim=(0, 1))
sns.despine(fig=g.fig, left=True)
plt.show()
36. import seaborn as sns
sns.set(style="darkgrid")
df = sns.load_dataset("titanic")
pal = dict(male="#6495ED", female="#F08080")
g = sns.lmplot(x="age", y="survived", col="sex", hue="sex", data=df,
palette=pal, y_jitter=.02, logistic=True)
g.set(xlim=(0, 80), ylim=(-.05, 1.05))
plt.show()
37. ※混合行列(confusion matrix)とは
混同行列とは、分類問題の結果を「実際のクラス」と「予測したクラス」を軸にしてまとめたものであ
る。二値分類では以下の 4 種類に分けることができる。
・真陽性(TP: True Positive): 実際のクラスが陽性で予測も陽性(正解)
・真陰性(TN: True Negative): 実際のクラスが陰性で予測も陰性(正解)
・偽陽性(FP: False Positive): 実際のクラスは陰性で予測が陽性(不正解)
・偽陰性(FN: False Negative): 実際のクラスは陽性で予測が陰性(不正解)
(4)主成分分析
主成分分析とは、
統計学上のデータ解析手法の一つである。
データの情報をなるべく保ったまま、
多く
の説明変数を、
より少ない指標や合成変数
(複数の変数により生成されたもの)
にまとめる手法である。
これは、
「次元の縮約」呼ばれることもある。また、要約した合成変数のことを「主成分」と呼ぶ。主成
分分析を行うことにより、データの情報をなるべく保持したまま、データ全体の可視化をすることがで
きる。
(4)—1 主成分分析実装演習
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
%matplotlib inline
cancer_df = pd.read_csv('/content/drive/My Drive/study_ai_ml/data/cancer.cs
v')
print('cancer df shape: {}'.format(cancer_df.shape))
cancer df shape: (569, 33)
cancer_df.drop('Unnamed: 32', axis=1, inplace=True)
38. y = cancer_df.diagnosis.apply(lambda d: 1 if d == 'M' else 0)
X = cancer_df.loc[:, 'radius_mean':]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
logistic = LogisticRegressionCV(cv=10, random_state=0)
logistic.fit(X_train_scaled, y_train)
print('Train score: {:.3f}'.format(logistic.score(X_train_scaled, y_train))
)
print('Test score: {:.3f}'.format(logistic.score(X_test_scaled, y_test)))
print('Confustion matrix:¥n{}'.format(confusion_matrix(y_true=y_test, y_pre
d=logistic.predict(X_test_scaled))))
Train score: 0.988
Test score: 0.972
Confustion matrix:
[[89 1]
[ 3 50]]
pca = PCA(n_components=30)
pca.fit(X_train_scaled)
plt.bar([n for n in range(1, len(pca.explained_variance_ratio_)+1)], pca.ex
plained_variance_ratio_)
39. pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_scaled)
print('X_train_pca shape: {}'.format(X_train_pca.shape))
# 寄与率
print('explained variance ratio: {}'.format(pca.explained_variance_ratio_))
temp = pd.DataFrame(X_train_pca)
temp['Outcome'] = y_train.values
b = temp[temp['Outcome'] == 0]
m = temp[temp['Outcome'] == 1]
plt.scatter(x=b[0], y=b[1], marker='o') # 良性は○でマーク
plt.scatter(x=m[0], y=m[1], marker='^') # 悪性は△でマーク
plt.xlabel('PC 1') # 第 1 主成分を x 軸
plt.ylabel('PC 2') # 第 2 主成分を y 軸
X_train_pca shape: (426, 2)
explained variance ratio: [0.43315126 0.19586506]
Text(0, 0.5, 'PC 2')
41. X_train, ys_train = gen_data()
plt.scatter(X_train[:, 0], X_train[:, 1], c=ys_train)
t = np.where(ys_train == 1.0, 1.0, -1.0)
n_samples = len(X_train)
K = X_train.dot(X_train.T)
eta1 = 0.01
eta2 = 0.001
n_iter = 500
H = np.outer(t, t) * K
a = np.ones(n_samples)
for _ in range(n_iter):
grad = 1 - H.dot(a)
a += eta1 * grad
a -= eta2 * a.dot(t) * t
a = np.where(a > 0, a, 0)
index = a > 1e-6
support_vectors = X_train[index]
support_vector_t = t[index]
support_vector_a = a[index]
term2 = K[index][:, index].dot(support_vector_a * support_vector_t)
42. b = (support_vector_t - term2).mean()
xx0, xx1 = np.meshgrid(np.linspace(-5, 5, 100), np.linspace(-5, 5, 100))
xx = np.array([xx0, xx1]).reshape(2, -1).T
X_test = xx
y_project = np.ones(len(X_test)) * b
for i in range(len(X_test)):
for a, sv_t, sv in zip(support_vector_a, support_vector_t, support_vect
ors):
y_project[i] += a * sv_t * sv.dot(X_test[i])
y_pred = np.sign(y_project)
plt.scatter(X_train[:, 0], X_train[:, 1], c=ys_train)
plt.scatter(support_vectors[:, 0], support_vectors[:, 1],
s=100, facecolors='none', edgecolors='k')
plt.contour(xx0, xx1, y_project.reshape(100, 100), colors='k',
levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-
', '--'])
plt.quiver(0, 0, 0.1, 0.35, width=0.01, scale=1, color='pink')
#訓練データ生成(線形分離不可能)
factor = .2
n_samples = 50
linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1]
outer_circ_x = np.cos(linspace)
43. outer_circ_y = np.sin(linspace)
inner_circ_x = outer_circ_x * factor
inner_circ_y = outer_circ_y * factor
X = np.vstack((np.append(outer_circ_x, inner_circ_x),
np.append(outer_circ_y, inner_circ_y))).T
y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp),
np.ones(n_samples // 2, dtype=np.intp)])
X += np.random.normal(scale=0.15, size=X.shape)
x_train = X
y_train = y
plt.scatter(x_train[:,0], x_train[:,1], c=y_train)
def rbf(u, v):
sigma = 0.8
return np.exp(-0.5 * ((u - v)**2).sum() / sigma**2)
X_train = x_train
t = np.where(y_train == 1.0, 1.0, -1.0)
n_samples = len(X_train)
K = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
K[i, j] = rbf(X_train[i], X_train[j])
44. eta1 = 0.01
eta2 = 0.001
n_iter = 5000
H = np.outer(t, t) * K
a = np.ones(n_samples)
for _ in range(n_iter):
grad = 1 - H.dot(a)
a += eta1 * grad
a -= eta2 * a.dot(t) * t
a = np.where(a > 0, a, 0)
index = a > 1e-6
support_vectors = X_train[index]
support_vector_t = t[index]
support_vector_a = a[index]
term2 = K[index][:, index].dot(support_vector_a * support_vector_t)
b = (support_vector_t - term2).mean()
xx0, xx1 = np.meshgrid(np.linspace(-1.5, 1.5, 100), np.linspace(-
1.5, 1.5, 100))
xx = np.array([xx0, xx1]).reshape(2, -1).T
X_test = xx
y_project = np.ones(len(X_test)) * b
for i in range(len(X_test)):
for a, sv_t, sv in zip(support_vector_a, support_vector_t, support_vect
ors):
y_project[i] += a * sv_t * rbf(X_test[i], sv)
y_pred = np.sign(y_project)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train)
化
plt.scatter(support_vectors[:, 0], support_vectors[:, 1],
s=100, facecolors='none', edgecolors='k')
45. plt.contourf(xx0, xx1, y_pred.reshape(100, 100), alpha=0.2, levels=np.linsp
ace(0, 1, 3))
plt.contour(xx0, xx1, y_project.reshape(100, 100), colors='k',
levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-
', '--'])
#訓練データ生成(重なりあり)
x0 = np.random.normal(size=50).reshape(-1, 2) - 1.
x1 = np.random.normal(size=50).reshape(-1, 2) + 1.
x_train = np.concatenate([x0, x1])
y_train = np.concatenate([np.zeros(25), np.ones(25)]).astype(np.int)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train)
46. X_train = x_train
t = np.where(y_train == 1.0, 1.0, -1.0)
n_samples = len(X_train)
K = X_train.dot(X_train.T)
C = 1
eta1 = 0.01
eta2 = 0.001
n_iter = 1000
H = np.outer(t, t) * K
a = np.ones(n_samples)
for _ in range(n_iter):
grad = 1 - H.dot(a)
a += eta1 * grad
a -= eta2 * a.dot(t) * t
a = np.clip(a, 0, C)
index = a > 1e-8
support_vectors = X_train[index]
support_vector_t = t[index]
support_vector_a = a[index]
term2 = K[index][:, index].dot(support_vector_a * support_vector_t)
b = (support_vector_t - term2).mean()
xx0, xx1 = np.meshgrid(np.linspace(-4, 4, 100), np.linspace(-4, 4, 100))
xx = np.array([xx0, xx1]).reshape(2, -1).T
X_test = xx
y_project = np.ones(len(X_test)) * b
for i in range(len(X_test)):
for a, sv_t, sv in zip(support_vector_a, support_vector_t, support_vect
ors):
y_project[i] += a * sv_t * sv.dot(X_test[i])
y_pred = np.sign(y_project)
47. plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train)
plt.scatter(support_vectors[:, 0], support_vectors[:, 1],
s=100, facecolors='none', edgecolors='k')
plt.contourf(xx0, xx1, y_pred.reshape(100, 100), alpha=0.2, levels=np.linsp
ace(0, 1, 3))
plt.contour(xx0, xx1, y_project.reshape(100, 100), colors='k',
levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-
', '--'])