はじめてのベイズ推定

E n j o y
D a t a
Science
ver. 150827
Kenta Matsui
Rerated Files

R
R
5
:
[ ]+[ --internet2]
R
#
getwd()
#
setwd(”[パス]“)

R Stan
R
Stan
7
Sys.setenv(MAKEFLAGS = "-j4")
install.packages("rstan", dependencies = TRUE)
RStan Getting Started

y x
x a
9
y[t]:
x1[t]: 1
x2[t]: 2
x3[t]: 3
xn[t]: n
a1[t]: 1
a2[t]: 2
a3[t]: 3
an[t]: n
trend[t]:
y_est = a x + trend

csv
10
サンプルデータは、Webで公開されている、英会話教室の無料体験申し込みのデータを加工したものです。
もとのデータはhttp://xica-inc.com/adelie/sample/data/English.zipからDLできます。
1
1 2
10E1~2
N/A 0
csv
0/1
sample.csv

( )
11
1
x
y
x y 0.3
0.9
1期前のxを
参照
2期前のxを
参照
4期にわたり
残存率0.7で
効果が残る
1期前の
x_w4を
参照
期間ビジネス指標y x x_l1 x_l2 x_st4w x_st4w_l1
w1 50 0
w2 52 0 0
w3 54 100 0 0
w4 70 0 100 0 70
w5 65 0 0 100 49 70
w6 60 0 0 0 34 49
w7 51 0 0 0 0 34
w8 53 100 0 0 100 0
w9 70 100 100 0 170 100
w10 75 100 100 100 219 170
w11 79 100 100 100 253 219
w12 74 0 100 100 153 253
w13 69 0 0 100 83 153
w14 58 0 0 0 34 83
w15 53 0 0 0 0 34
w16 52 0 0 0 0 0
残存率 - - - - 0.7 0.7
yとの相関係数 1 0.32 0.84 0.74 0.85 0.91
ラグとアドストックを考慮した
出稿量の前処理の例

13
tyttt trendady ,
1 tt trendtrend
t
t a x
t t-1 ( )
1,,  tntn aa
n t t-1
tntnttttt xaxaxaad ,,,2,2,1,1  ・・・
bxaxaxay tnnttt  ,,22,11 ...
cf.

( OK)
14
tyttt trendady ,
t
y 0 y
 y, 0,~  ty
 y,~ ttt trendady 
a 0 a
 nn ata  0,~, tatntn n
aa ,1,,  
n t t-1
 natt aa ,~ 1
1,,  tntn aa

( OK)
15
t t-1
ttrendtttt trendtrendtrendtrend ,211  
0 trend
 trend, 0,~  ttrend
1 tt trendtrend
a[T,N], trend[T], a[N], y, trend
a[N],b
tntnttttt xaxaxaad ,,,2,2,1,1  ・・・
 trend21 ,*2~   ttt trendtrendtrend

model.stan
R R
16
#サンプルです
d <- read.csv("sample.csv")
T = nrow(d)
T
[1] 53
R
Rerated Files

17
#パッケージの展開
library(rstan)
#入力データをリスト化
d <- read.csv("sample.csv")
T = nrow(d)
N = ncol(d)-1
data <- list(
T = T,
N = N,
y = d[,1],
x = d[,1:N+1]
)
#dataの中身を確認
data
Stan list
data T,x N,y , x
OK
N/A
csv
> data
$T
[1] 53
$N
[1] 6
$y
[1] 6.51 5.85 6.40 6.89 5.97 2.90 3.06 3.19 2.88 3.29 3.60 4.58 4.31
7.17 5.81 5.26 5.23 5.19 3.16 3.14 3.01 2.95 2.89 3.21 3.46 3.50 3.14
3.44 3.64 3.18 3.14 3.42 3.04 2.92 3.26 2.85 3.59 4.29 4.43 3.92 4.58
4.18 2.92 3.19 3.30 2.96 2.86 3.18 3.21 3.36 3.46 3.25 3.76
$x
バナー広告 TVCM 電車A. 電車B. 雑誌A 雑誌B
1 13.18 0 0 0 0 0
2 11.76 0 0 0 0 0
3 11.45 0 0 0 0 0
4 10.46 0 0 0 0 0
5 9.78 0 0 0 0 0

Stan
18
#stanを実行する
fit <- stan( file = 'model.stan', data = data, iter = 1500, chains=3)
model.stan fit
file data
iter chains
iter
doParallel
#パッケージのインストール(初回のみでOK)
install.packages(“doParallel", dependencies = TRUE)
run_stan.text 17-29 #
( )

mean se_mean sd X2.5. X25. X50. X75. X97.5. n_eff Rhat
a[1,1] 0.39 0.00 0.08 0.21 0.34 0.40 0.45 0.50 348 1.02
a[1,2] 0.15 0.01 0.15 0.01 0.06 0.11 0.20 0.53 320 1.02
a[1,3] 0.10 0.00 0.08 0.01 0.05 0.08 0.14 0.31 359 1.01
a[1,4] 0.12 0.00 0.09 0.01 0.05 0.09 0.16 0.36 421 1.01
a[1,5] 0.02 0.00 0.02 0.00 0.01 0.01 0.02 0.06 235 1.02
a[1,6] 0.06 0.00 0.07 0.00 0.02 0.04 0.07 0.27 350 1.01
19
#サマリーをアウトプット、収束していること(Rhat<1.1)を確認
fit.summary <- data.frame(summary(fit)$summary)
write.table(fit.summary,"summary.csv")
csv
Excel a
mean se_mean sd ,X2.5-X97.5 ,
n_eff Rhat
Rhat<1.1 n_eff (50 )
n_eff Rhat 1.1
Check

20
#もっとも確からしいパラメータを導出
smp<-extract(fit)
a_est<-matrix(1:T*N,T,N)
for (i in 1:N){
for (t in 1:T) {
tmp<-density(smp$a[,t,i])
a_est[t,i]<-
tmp$x[tmp$y==max(tmp$y)]
}
}
colnames(a_est)<- c("a1","a2","a3","a4","a5","a6")
#↑xの個数に応じて適宜増減
trend_est<-matrix(1:T,T,1)
for (t in 1:T) {
tmp<-density(smp$trend[,t])
trend_est[t]<-tmp$x[tmp$y==max(tmp$y)]
}
colnames(trend_est)<- "trend"
parameters_est<-cbind(a_est,trend_est)
write.table(parameters_est,"parameters.csv")
a
a_est
colnames
x n
an,trend}

000
100
200
300
400
500
600
700
申込者の推移
ベースラインバナー TVCM 電車広告A 電車広告B 雑誌広告A 雑誌広告B
WAIC 0.82
相関係数 0.96
決定係数 0.89
(sample_analysis.xcels)
21
y = a x + trend
sample
* A
* ROI
バナー広告
TVCM
電車広告A
電車広告B
雑誌広告A
雑誌広告B
広告効率
バナー
87%
TVCM
8%
電車広告A
2%
電車広告B
1% 雑誌広告A
1% 雑誌広告B
1%
貢献の内訳

Stan data, parameters, model
23
data {
int<lower=1> T;
int<lower=0> N;
real<lower=0> y[T];
matrix[T,N] x;
}
parameters {
matrix<lower=0>[T,N] a;
real<lower=0> trend[T];
real<lower=0, upper=100> s_a[N];
real<lower=0, upper=100> s_trend;
real<lower=0, upper=100> s_y;
}
model {
for (t in 2:T)
a[t]~normal(a[t-1],s_a);
for (t in 3:T)
trend[t]~normal(2*trend[t-1]-trend[t-2],s_trend);
for (t in 1:T)
y[t]~normal(dot_product(a[t],x[t])+trend[t],s_y);
}
data
Int: , real: , vector:
matrix:
;
parameters
<lower=X, upper=X>
model
model.stan *
*generated quantities WAIC

24
trend 0~20
trend1 0-20 model
a2 0.4
a2,1 0.4 model
trend[1] ~ uniform(0, 20);
a[1,2] ~ normal(0.4, 5);
Stan cf. BUGS
uniform(α, β) dunif(α, β)
normal(μ, σ) dnorm(μ, 1/σ)
binormal(N, p) dbin(p, N)
poisson(λ) dpois(λ)
gamma(ν, β) damma(ν, β)
y 0~100 trend
parameters trend 0-100
real<lower=0, upper=100> trend[T];

AR
AR
25
𝑎𝑟𝑡 = 𝑐 𝑎𝑟1 ∗ 𝑎𝑟𝑡−1 𝑐 𝑎𝑟2 ∗ 𝑎𝑟𝑡−2 + 𝜀 𝑎𝑟,𝑡
𝜀 𝑎𝑟,𝑡~Ν(0, 𝜎 𝑎𝑟) ∴ 𝑎𝑟𝑡~Ν( 𝑐 𝑎𝑟1 ∗ 𝑎𝑟𝑡−1＋𝑐 𝑎𝑟2 ∗ 𝑎𝑟𝑡−2, 𝜎 𝑎𝑟)
for (t in 3:T)
ar[t] ~ normal(c_ar[1]*ar[t-1] + c_ar[2]*ar[t-2], s_ar);
for (t in 1:T)
y[t] ~ normal(dot_product(a[t],x[t]) + trend[t] + ar[t], s_y);
ar c_ar s_ar parameters
AR model
real<lower=0> ar[T];
real c_ar[2];
real<lower=0, upper=100> s_ar;
ar_est<-matrix(1:T,T,1)
for (t in 1:T) {
tmp<-density(smp$ar[,t])
ar_est[t]<-tmp$x[tmp$y==max(tmp$y)]
}
colnames(ar_est)<- “ar"
parameters_est<-cbind(parameters_est,ar_est)
R ar

7 w
26
𝑘=0
6
𝑤𝑡−𝑘 ≈ 0 𝑤𝑡 = −
𝑘=1
6
𝑤𝑡−𝑘 𝜀 𝑤,𝑡 𝜀 𝑤,𝑡~Ν(0, 𝜎 𝑤)
∴ 𝑤𝑡 ~Ν(−
𝑘=1
6
𝑤𝑡−𝑘 , 𝜎 𝑤)
w_est<-matrix(1:T,T,1)
for (t in 1:T) {
tmp<-density(smp$w[,t])
w_est[t]<-tmp$x[tmp$y==max(tmp$y)]
}
colnames(w_est)<- “w"
parameters_est<-cbind(parameters_est,w_est)
R w
for (t in 7:T)
w[t] ~ normal(-w[t-1]-w[t-2]-w[t-3] -w[t-4] -w[t-5] -w[t-6], s_w);
for (t in 1:T)
y[t] ~ normal(dot_product(a[t],x[t]) + trend[t] + w[t], s_y);
w s_w parameters
model
real<lower=0> w[T];
real<lower=0, upper=100> s_w;

27
0
0.25
0.5
0.75
1
0mm 0.1-5mm 5.1-20mm 20+mm
𝑟𝑎𝑖𝑛 𝑡 = 𝑐 𝑟𝑎𝑖𝑛 ∗ 𝑟𝑎𝑖𝑛_𝑣𝑎𝑙 𝑡
0.0
5.0
10.0
15.0
20.0
25.0
30.0
0 100000 200000 300000
𝑇𝑉𝑡 = 𝑐 𝑇𝑉 ∗ 𝑇𝑉_𝑟𝑒𝑎𝑐ℎ 𝑡
f(x)=k-ab^x

Appendix:
29
RStan
Stan (4):
MCMC
:
Stan (3): Stan
Stan - Analyze IT.
Stan
Hamiltonian Monte Carlo Stan

はじめてのベイズ推定

Recommended

Recommended

More Related Content

Viewers also liked

Viewers also liked (13)

Similar to はじめてのベイズ推定

Similar to はじめてのベイズ推定 (20)

はじめてのベイズ推定