Twitter
http://www.slideshare.net/matsukenbook
MASAKARI Come On! щ( щ)
https://twitter.com/_inundata/status/616658949761302528
Y
G
X N
X N ⇥ p
K
K
p
N ⇥ pxi X
yi
gi
K
p
N
X
xi
N
p
...
...
xj
N ⇥ p
X, Y, G
X
xi
XT
1
XT
i
XT
N
G(x) G
G = {class1, class2, · · · , classK}
k
ˆfk(x) = ˆk0 + ˆT
k x
k ` ˆfk(x) = ˆf`(x)
{x : (ˆk0
ˆ`0) + (ˆk
ˆ`)T
x = 0}
x
🌾
p
p
1
1
k k(x)
x
k Pr(G = k|X = x)
x
k(x) Pr(G = k|X = x) x
Pr(G = 1|X = x) =
exp( 0 + T
x)
1 + exp( 0 + T x)
Pr(G = 2|X = x) =
1
1 + exp( 0 + T x)
log
✓
p
1 p
◆
logit(Pr(G = 1|X = x)) = 0 + T
x = log
Pr(G = 1|X = x)
Pr(G = 2|X = x)
X1, · · · , Xp
D = (X1, X2) D0
= (X1, X2, X2
1 , X2
2 , X1X2)
p(p + 1)/2
Rp
7! Rq
(q > p) h(X)
sklearn.discriminant_analysis.LinearDiscriminantAnalysis
D0
= (X1, X2, X2
1 , X2
2 , X1X2)
G K
G(x) k
Y
Y = (Y1, Y2, · · · , Yk, · · · , YK)
= (0, 0, · · · , 1, · · · , 0)
N
Y
N
K
N ⇥ K
k K1 2
ˆY = X(X
T
X)
1
XT
Y ˆB
N
K
N
p + 1
p + 1
N
N
p + 1
p + 1
p + 1
p + 1
N
N
K
p + 1
K
X p + 1
x
ˆf(x)T
= (1, xT
) ˆB K
K
x
ˆG(x) = argmax
k2G
ˆfk(x)
ˆY = X(X
T
X)
1
XT
Y
ˆG(x) = argmax
k2G
ˆfk(x)
E(Yk|X = x) = Pr(G = k|X = x)
ˆfk(x)
E(Yk|X = x) = Pr(G = k|X = x)
[ 0.19942274,
-0.01553064,
0.8161079 ]
X
k2G
ˆfk(x) = 1
0  ˆfk(x)  1
min
B
NX
i=1
kyi
⇥
(1, xT
i )B
⇤T
k2
tk
ˆG(x) = argmin
k
k ˆf(x) tkk2
K
1 1
K
p + 1
B
ˆf(x) tk
ˆf(x)
p + 1
ˆG(x) = argmin
k
k ˆf(x) tkk2
ˆG(x) = argmax
k2G
ˆfk(x)
min
B
NX
i=1
kyi
⇥
(1, xT
i )B
⇤T
k2
K K 3
[ 0.3549 0.5517 0.712 0.8062 0.8631 0.9095 0.9458 0.9739 0.9916 1.0000 ]
Pr(G|X)
fk(x) G = k X
⇡k k
KX
k=1
⇡k = 1
Pr(G = k|X = x) =
fk(x)⇡k
PK
` f`(x)⇡`
fk(x) =
1
(2⇡)p/2|⌃k|1/2
exp
✓
1
2
(x µk)T
⌃ 1
k (x µk)
◆
1
p + 1
p + 1p + 1 p + 1
1
xt
Ax =
nX
i,j=1
aijxixj
µ = (0, 0)
⌃ =
✓ 2
1 12
12
2
2
◆
=
✓
1 0.6
0.6 1
◆
⌃k = ⌃, 8k
log
Pr(G = k|X = x)
Pr(G = `|X = x)
= log
fk(x)
f`(x)
+ log
⇡k
⇡`
= log
⇡k
⇡`
1
2
(µk + µ`)T
⌃ 1
(µk + µ`) + xT
⌃ 1
(µk + µ`)
= log
fk(x)⇡k
f`(x)⇡`
k `
k(x) = xT
⌃ 1
µk
1
2
µT
k ⌃ 1
µk + log ⇡k
G(x) = argmaxk k(x)
ˆ⇡k = Nk/N
ˆµk =
X
gi=k
xi/Nk
ˆ⌃ =
KX
k=1
X
gi=k
(xi ˆµk)(xi ˆµk)T
/(N K)
xT ˆ⌃ 1
(ˆµ2 ˆµ1) >
1
2
(ˆµ2 + ˆµ1)T ˆ⌃ 1
(ˆµ2 ˆµ1) log
N2
N1
1(x) = xT
⌃ 1
µ1
1
2
µT
1 ⌃ 1
µ1 + log ⇡1
2(x) = xT
⌃ 1
µ2
1
2
µT
2 ⌃ 1
µ2 + log ⇡2
ˆ⇡k = Nk/N
ˆµk =
X
gi=k
xi/Nk
ˆ⌃ =
KX
k=1
X
gi=k
(xi ˆµk)(xi ˆµk)T
/(N K)
µk = ˆµk, ⌃ 1
= ˆ⌃ 1
xT ˆ⌃ 1
(ˆµ2 ˆµ1) >
1
2
(ˆµ2 + ˆµ1)T ˆ⌃ 1
(ˆµ2 ˆµ1) log
N2
N1
fk(x), f`(x)
log
Pr(G = k|X = x)
Pr(G = `|X = x)
= log
fk(x)
f`(x)
+ log
⇡k
⇡`
= log
⇡k
⇡`
1
2
(µk + µ`)T
⌃ 1
(µk + µ`) + xT
⌃ 1
(µk + µ`)
fk(x) =
1
(2⇡)p/2|⌃k|1/2
exp
✓
1
2
(x µk)T
⌃ 1
k (x µk)
◆
k(x) =
1
2
log |⌃k|
1
2
(x µk)T
⌃ 1
k (x µk) + log ⇡k
k(x) = xT
⌃ 1
µk
1
2
µT
k ⌃ 1
µk + log ⇡k
(K 1) ⇥ (p + 1)
(K 1) ⇥ {p(p + 3)/2 + 1}
ˆ⌃k(↵) = ↵ˆ⌃k + (1 ↵)ˆ⌃ ↵ 2 [0, 1]
url <- "https://cran.r-project.org/src/contrib/Archive/ascrda/ascrda_1.15.tar.gz"
pkgFile <- "ascrda_1.15.tar.gz"
download.file(url = url, destfile = pkgFile)
# Install package
install.packages(c("rda", "sfsmisc", "e1071", "pamr"))
install.packages(pkgs=pkgFile, type="source", repos=NULL)
# http://www.inside-r.org/packages/cran/ascrda/docs/FitRda
install.packages("ascrda")
require(ascrda)
df_vowel_train <- read.table("../vowel.train.csv", sep=",", header=1)
df_vowel_test <- read.table("../vowel.test.csv", sep=",", header=1)
df_vowel_train$row.names<- NULL
df_vowel_test$row.names<- NULL
y <- df_vowel_train$y
y_test <- df_vowel_test$y
X <- df_vowel_train[ ,c(F,T,T,T,T,T,T,T,T,T,T)]
X_test <- df_vowel_test[ ,c(F,T,T,T,T,T,T,T,T,T,T)]
a <- rep(0, 100)
res_train <- rep(0, 100)
res_test <- rep(0, 100)
for(i in 1:101){
a[i] <- 0.01*(i-1)
print (a[i])
startTime <- proc.time()[3]
ans <- FitRda(X, y, X_test, y_test, alpha=a)
endTime <- proc.time()[3]
print(endTime-startTime)
res_train[i] <- ans[1]
res_test[i] <- ans[2]
}
df_result <- data.frame(train=res_train, test=res_test)
write.table(df_result, file="df_result.csv", sep=",")
ˆ⌃( ) = ˆ⌃ + (1 )ˆ2
I 2 [0, 1]
ˆ⌃k = UkDkUT
k
log |ˆ⌃k| =
X
`
log dk`
(x ˆµk)T ˆ⌃ 1
k (x ˆµk) =
⇥
UT
k (x ˆµk)
⇤T
D 1
k
⇥
UT
k (x ˆµk)
⇤
ˆ⌃k = UkDkUT
k
p ⇥ p dk`
(AB) 1
= B 1
A 1
ˆ⇡k = Nk/N
ˆµk =
X
gi=k
xi/Nk
ˆ⌃ =
KX
k=1
X
gi=k
(xi ˆµk)(xi ˆµk)T
/(N K)
X⇤
D 1/2
UT
X
ˆ⌃ = UDUT
X⇤ def	
  sphere(X):	
  
	
  	
  	
  	
  S	
  =	
  np.cov(X.T)	
  
	
  	
  	
  	
  U	
  =	
  np.linalg.eig(S)[1]	
  
	
  	
  	
  	
  D	
  =	
  np.diag(np.linalg.eigvals(S))	
  
	
  	
  	
  	
  D_rt	
  =	
  scipy.linalg.sqrtm(D)	
  
	
  	
  	
  	
  D_rt_inv	
  =	
  np.linalg.inv(D_rt)	
  
	
  	
  	
  	
  return	
  np.dot(D_rt_inv,	
  np.dot(U.T,	
  X.T)).T
⇡k
D1/2
p K
K 1
p K
HK 1
K > 3 K 1
L < K 1 HL ✓ HK 1
L
Z = aT
X
a
a
max
a
aT
Ba
aT Wa
(m1 m2)2
m1
m2
µ2
µ1
a
= (aT
(µ1 µ2))2
= (aT
(µ1 µ2))(aT
(µ1 µ2))T
= aT
(µ1 µ2)(µ1 µ2)T
a
1
p + 1p + 1
1
1
p + 1 p + 1
1
max
a
aT
Ba
aT Wa
max
a
aT
Ba
aT Wa
= aT
Ba ⇥ N
max
a
aT
Ba
aT Wa
max
a
aT
Ba
aT Wa
a
m1
µ1
x1
x2
x3
y3
y2
y1
=
KX
k=1
X
gi=k
(yi mk)2
=
KX
k=1
X
gi=k
(aT
(xi µk))2
=
KX
k=1
X
gi=k
(aT
(xi µk))(aT
(xi µk))T
=
KX
k=1
aT
0
@
X
gi=k
(xi µk)(xi µk)T
1
A a
= aT
Wa ⇥ N
max
a
aT
Ba
aT Wa
max
a
aT
Ba subject to aT
Wa = 1
@
@a
[aT
Ba + (aT
Wa 1)] = 0
@
@
[aT
Ba + (aT
Wa 1)] = 0
Ba = Wa
W W 1
Ba = a
@
@x
xT
Ax = 2Ax
W 1
Ba = a
a
m1
m2
µ2
µ1
a
W W 1
Ba = a
M⇤
= MW 1/2
B⇤
= V⇤
DBV⇤T
v` = W 1/2
v⇤
`
K ⇥ pM
W
WM⇤
B M⇤
B⇤
B
V⇤v⇤
`
` Z` = vT
` X
p + 1
p ⇥ p
W 1/2
= UD
1/2
W U 1
W = UDW U 1
K
M = {µ1, · · · , µK}T
[-1.23290493 1.00301738]
[-0.30483077 -0.78126293]
v1 =
v2 =
v2
v1
Z1
Z2
log ⇡k
⇡k
統計的学習の基礎 4章 前半

統計的学習の基礎 4章 前半

  • 2.
  • 3.
  • 4.
    MASAKARI Come On!щ( щ) https://twitter.com/_inundata/status/616658949761302528
  • 8.
    Y G X N X N⇥ p K K p
  • 9.
    N ⇥ pxiX yi gi K p N
  • 10.
    X xi N p ... ... xj N ⇥ p X,Y, G X xi XT 1 XT i XT N
  • 12.
    G(x) G G ={class1, class2, · · · , classK}
  • 13.
    k ˆfk(x) = ˆk0+ ˆT k x k ` ˆfk(x) = ˆf`(x) {x : (ˆk0 ˆ`0) + (ˆk ˆ`)T x = 0} x 🌾 p p 1 1
  • 14.
    k k(x) x k Pr(G= k|X = x) x k(x) Pr(G = k|X = x) x
  • 15.
    Pr(G = 1|X= x) = exp( 0 + T x) 1 + exp( 0 + T x) Pr(G = 2|X = x) = 1 1 + exp( 0 + T x) log ✓ p 1 p ◆ logit(Pr(G = 1|X = x)) = 0 + T x = log Pr(G = 1|X = x) Pr(G = 2|X = x)
  • 17.
    X1, · ·· , Xp D = (X1, X2) D0 = (X1, X2, X2 1 , X2 2 , X1X2) p(p + 1)/2 Rp 7! Rq (q > p) h(X)
  • 18.
  • 20.
    G K G(x) k Y Y= (Y1, Y2, · · · , Yk, · · · , YK) = (0, 0, · · · , 1, · · · , 0) N Y N K N ⇥ K k K1 2
  • 21.
    ˆY = X(X T X) 1 XT YˆB N K N p + 1 p + 1 N N p + 1 p + 1 p + 1 p + 1 N N K p + 1 K X p + 1
  • 22.
    x ˆf(x)T = (1, xT )ˆB K K x ˆG(x) = argmax k2G ˆfk(x)
  • 23.
    ˆY = X(X T X) 1 XT Y ˆG(x)= argmax k2G ˆfk(x)
  • 25.
    E(Yk|X = x)= Pr(G = k|X = x) ˆfk(x) E(Yk|X = x) = Pr(G = k|X = x) [ 0.19942274, -0.01553064, 0.8161079 ] X k2G ˆfk(x) = 1 0  ˆfk(x)  1
  • 28.
    min B NX i=1 kyi ⇥ (1, xT i )B ⇤T k2 tk ˆG(x)= argmin k k ˆf(x) tkk2 K 1 1 K p + 1 B ˆf(x) tk ˆf(x) p + 1
  • 29.
    ˆG(x) = argmin k kˆf(x) tkk2 ˆG(x) = argmax k2G ˆfk(x) min B NX i=1 kyi ⇥ (1, xT i )B ⇤T k2
  • 30.
  • 33.
    [ 0.3549 0.55170.712 0.8062 0.8631 0.9095 0.9458 0.9739 0.9916 1.0000 ]
  • 35.
    Pr(G|X) fk(x) G =k X ⇡k k KX k=1 ⇡k = 1 Pr(G = k|X = x) = fk(x)⇡k PK ` f`(x)⇡`
  • 37.
    fk(x) = 1 (2⇡)p/2|⌃k|1/2 exp ✓ 1 2 (x µk)T ⌃1 k (x µk) ◆ 1 p + 1 p + 1p + 1 p + 1 1 xt Ax = nX i,j=1 aijxixj µ = (0, 0) ⌃ = ✓ 2 1 12 12 2 2 ◆ = ✓ 1 0.6 0.6 1 ◆
  • 38.
    ⌃k = ⌃,8k log Pr(G = k|X = x) Pr(G = `|X = x) = log fk(x) f`(x) + log ⇡k ⇡` = log ⇡k ⇡` 1 2 (µk + µ`)T ⌃ 1 (µk + µ`) + xT ⌃ 1 (µk + µ`) = log fk(x)⇡k f`(x)⇡` k `
  • 41.
    k(x) = xT ⌃1 µk 1 2 µT k ⌃ 1 µk + log ⇡k G(x) = argmaxk k(x)
  • 42.
    ˆ⇡k = Nk/N ˆµk= X gi=k xi/Nk ˆ⌃ = KX k=1 X gi=k (xi ˆµk)(xi ˆµk)T /(N K)
  • 43.
    xT ˆ⌃ 1 (ˆµ2ˆµ1) > 1 2 (ˆµ2 + ˆµ1)T ˆ⌃ 1 (ˆµ2 ˆµ1) log N2 N1 1(x) = xT ⌃ 1 µ1 1 2 µT 1 ⌃ 1 µ1 + log ⇡1 2(x) = xT ⌃ 1 µ2 1 2 µT 2 ⌃ 1 µ2 + log ⇡2 ˆ⇡k = Nk/N ˆµk = X gi=k xi/Nk ˆ⌃ = KX k=1 X gi=k (xi ˆµk)(xi ˆµk)T /(N K) µk = ˆµk, ⌃ 1 = ˆ⌃ 1
  • 44.
    xT ˆ⌃ 1 (ˆµ2ˆµ1) > 1 2 (ˆµ2 + ˆµ1)T ˆ⌃ 1 (ˆµ2 ˆµ1) log N2 N1
  • 45.
    fk(x), f`(x) log Pr(G =k|X = x) Pr(G = `|X = x) = log fk(x) f`(x) + log ⇡k ⇡` = log ⇡k ⇡` 1 2 (µk + µ`)T ⌃ 1 (µk + µ`) + xT ⌃ 1 (µk + µ`) fk(x) = 1 (2⇡)p/2|⌃k|1/2 exp ✓ 1 2 (x µk)T ⌃ 1 k (x µk) ◆
  • 46.
    k(x) = 1 2 log |⌃k| 1 2 (xµk)T ⌃ 1 k (x µk) + log ⇡k k(x) = xT ⌃ 1 µk 1 2 µT k ⌃ 1 µk + log ⇡k
  • 48.
    (K 1) ⇥(p + 1) (K 1) ⇥ {p(p + 3)/2 + 1}
  • 50.
    ˆ⌃k(↵) = ↵ˆ⌃k+ (1 ↵)ˆ⌃ ↵ 2 [0, 1]
  • 51.
    url <- "https://cran.r-project.org/src/contrib/Archive/ascrda/ascrda_1.15.tar.gz" pkgFile<- "ascrda_1.15.tar.gz" download.file(url = url, destfile = pkgFile) # Install package install.packages(c("rda", "sfsmisc", "e1071", "pamr")) install.packages(pkgs=pkgFile, type="source", repos=NULL) # http://www.inside-r.org/packages/cran/ascrda/docs/FitRda install.packages("ascrda") require(ascrda) df_vowel_train <- read.table("../vowel.train.csv", sep=",", header=1) df_vowel_test <- read.table("../vowel.test.csv", sep=",", header=1) df_vowel_train$row.names<- NULL df_vowel_test$row.names<- NULL y <- df_vowel_train$y y_test <- df_vowel_test$y X <- df_vowel_train[ ,c(F,T,T,T,T,T,T,T,T,T,T)] X_test <- df_vowel_test[ ,c(F,T,T,T,T,T,T,T,T,T,T)] a <- rep(0, 100) res_train <- rep(0, 100) res_test <- rep(0, 100) for(i in 1:101){ a[i] <- 0.01*(i-1) print (a[i]) startTime <- proc.time()[3] ans <- FitRda(X, y, X_test, y_test, alpha=a) endTime <- proc.time()[3] print(endTime-startTime) res_train[i] <- ans[1] res_test[i] <- ans[2] } df_result <- data.frame(train=res_train, test=res_test) write.table(df_result, file="df_result.csv", sep=",")
  • 53.
    ˆ⌃( ) =ˆ⌃ + (1 )ˆ2 I 2 [0, 1]
  • 54.
    ˆ⌃k = UkDkUT k log|ˆ⌃k| = X ` log dk` (x ˆµk)T ˆ⌃ 1 k (x ˆµk) = ⇥ UT k (x ˆµk) ⇤T D 1 k ⇥ UT k (x ˆµk) ⇤ ˆ⌃k = UkDkUT k p ⇥ p dk` (AB) 1 = B 1 A 1
  • 55.
    ˆ⇡k = Nk/N ˆµk= X gi=k xi/Nk ˆ⌃ = KX k=1 X gi=k (xi ˆµk)(xi ˆµk)T /(N K) X⇤ D 1/2 UT X ˆ⌃ = UDUT X⇤ def  sphere(X):          S  =  np.cov(X.T)          U  =  np.linalg.eig(S)[1]          D  =  np.diag(np.linalg.eigvals(S))          D_rt  =  scipy.linalg.sqrtm(D)          D_rt_inv  =  np.linalg.inv(D_rt)          return  np.dot(D_rt_inv,  np.dot(U.T,  X.T)).T ⇡k D1/2
  • 56.
    p K K 1 pK HK 1
  • 57.
    K > 3K 1 L < K 1 HL ✓ HK 1 L
  • 58.
  • 59.
    (m1 m2)2 m1 m2 µ2 µ1 a = (aT (µ1µ2))2 = (aT (µ1 µ2))(aT (µ1 µ2))T = aT (µ1 µ2)(µ1 µ2)T a 1 p + 1p + 1 1 1 p + 1 p + 1 1 max a aT Ba aT Wa max a aT Ba aT Wa = aT Ba ⇥ N
  • 60.
    max a aT Ba aT Wa max a aT Ba aT Wa a m1 µ1 x1 x2 x3 y3 y2 y1 = KX k=1 X gi=k (yimk)2 = KX k=1 X gi=k (aT (xi µk))2 = KX k=1 X gi=k (aT (xi µk))(aT (xi µk))T = KX k=1 aT 0 @ X gi=k (xi µk)(xi µk)T 1 A a = aT Wa ⇥ N
  • 61.
    max a aT Ba aT Wa max a aT Ba subjectto aT Wa = 1 @ @a [aT Ba + (aT Wa 1)] = 0 @ @ [aT Ba + (aT Wa 1)] = 0 Ba = Wa W W 1 Ba = a @ @x xT Ax = 2Ax
  • 62.
    W 1 Ba =a a m1 m2 µ2 µ1 a W W 1 Ba = a
  • 63.
    M⇤ = MW 1/2 B⇤ =V⇤ DBV⇤T v` = W 1/2 v⇤ ` K ⇥ pM W WM⇤ B M⇤ B⇤ B V⇤v⇤ ` ` Z` = vT ` X p + 1 p ⇥ p W 1/2 = UD 1/2 W U 1 W = UDW U 1 K M = {µ1, · · · , µK}T
  • 65.
  • 66.