1958
1998
&
𝟐

𝜎2
=
1
𝑛
෍
𝑖=1
𝑛
𝑥𝑖 − 𝜇 2
𝜎 = 𝜎2

𝜎2
= 4
𝜎 = 2
𝜎2
= 0.57
𝜎 = 0.75
𝒙 𝒚 𝒙𝒊 − ഥ
𝒙 𝒚𝒊 − ഥ
𝒚 (𝒙𝒊 − ഥ
𝒙) ⋅ (𝒚𝒊 − ഥ
𝒚)

ഥ
𝒙
ഥ
𝒚
𝑐𝑜𝑣(𝑋, 𝑌) =
σ𝑖=1
𝑛
𝑥𝑖− ҧ
𝑥 ⋅ 𝑦𝑖− ത
𝑦
𝑛
−1 ≤
𝑐𝑜𝑣 𝑥, 𝑦
𝜎𝑥 ⋅ 𝜎𝑦
≤ 1
𝑦𝑖 𝑥𝑖
𝑦𝑖 𝑥𝑖
𝑦𝑖 = 𝛽0 + 𝛽1𝑥i + 𝜖𝑖
𝜷𝟎
𝜷𝟏
ො
𝑦𝑖 = 𝛽0 + 𝛽1𝑥i
𝝐𝒊
ො
𝑦𝑖
𝑦𝑖 ො
𝑦𝑖
𝐚𝐫𝐠 𝒎𝒊𝒏
𝜷𝟎, 𝜷𝟏
෍
𝒊=𝟏
𝒏
𝝐𝒊
𝟐
arg min
𝛽0, 𝛽1
෍
𝑖=1
𝑛
[𝑦𝑖 − (𝛽0 + 𝛽1𝑥𝑖)]2
ො
𝑦𝑖 = 𝛽0 + 𝛽1𝑥𝑖
𝑦𝑖 = ො
𝑦𝑖 + 𝜖𝑖
𝝐𝒊 = 𝒚𝒊 − ෝ
𝒚𝒊
𝜷𝟏 =
𝒄𝒐𝒗 𝒙, 𝒚
𝝈𝒙
𝟐
𝜷𝟎 = ഥ
𝒚 − 𝜷𝟏ഥ
𝒙
⇒
𝒙 𝒚 𝒙𝒊 − ഥ
𝒙 𝒚𝒊 − ഥ
𝒚 (𝒙𝒊 − ഥ
𝒙) ⋅ (𝒚𝒊 − ഥ
𝒚) 𝒙 − ഥ
𝒙 𝟐
ഥ
𝒙 ഥ
𝒚  
𝑟𝑥𝑦 =
−125
246 ⋅ 78
= −0,9 𝛽0 = 22
𝛽1 = −0.71
෍ 𝜖2 = 20.25
ො
𝑦𝑖 = 22 − 0.71 ⋅ 𝑥𝑖
𝛽1 =
𝑐𝑜𝑣 𝑥, 𝑦
𝜎𝑥
=
−125
246
= −0.51
෍ 𝜖2
= 14.48
ො
𝑦𝑖 = 19.1 − 0.51 ⋅ 𝑥𝑖
𝛽0 = ത
𝑦 − 𝛽1 ҧ
𝑥 = 14 + 0.51 ⋅ 10 = 19.1
ෝ
𝒚 = 𝟐𝟓𝟕𝟗𝟐 + 𝟗𝟒𝟓𝟎 ⋅ 𝒀𝒆𝒂𝒓𝒔𝑬𝒙𝒑𝒆𝒓𝒊𝒆𝒏𝒄𝒆
෍
𝑖=1
𝑛
𝑦𝑖 − ത
𝑦 2 ෍
𝑖=1
𝑛
ො
𝑦𝑖 − ത
𝑦 2 ෍
𝑖=1
𝑛
𝑦𝑖 − ො
𝑦 2
𝑅2
𝑹𝟐 𝒓𝟐
0 ≤ 𝑅2
≤ 1
𝑥− ҧ
𝑥
𝜎𝑥
𝛽1
𝛽1
𝛽1 𝛽1 ≠ 0 𝛽1
𝛽1 ≠ 0
𝑯𝟎: 𝛽1 = 0
𝑯𝟏: 𝛽1 ≠ 0
ො
𝑦𝑖 = 𝛽0
ො
𝑦𝑖 = 𝛽0 + 𝛽1𝑋
𝛽𝑖−0
𝑆𝐸(𝛽𝑖)
𝜷𝟎
𝜷𝟏
𝛽𝑖−0
𝑆𝐸(𝛽𝑖)
𝐸𝑆𝑆
𝑅𝑆𝑆
𝑦 = 𝛽0 + 𝛽1𝑥1 + ⋯ + 𝛽𝑝𝑥𝑝 + 𝜖𝑖
ො
𝑦𝑖
𝛽𝑗 𝑥𝑖
𝑯𝟎: 𝛽1 = 𝛽2 = ⋯ = 𝛽𝑝 = 0
𝛽𝑗 ≠ 0
𝑯𝟏: ∃𝑗: 𝛽𝑗≠ 0
ො
𝑦 = 𝛽0
ො
𝑦 = 𝛽0 + 𝛽𝑖𝑋𝑖 + … + 𝛽𝑗𝑋𝑗
≫ 𝐻0
2𝑝
𝟏. 𝟐𝟔𝟕. 𝟔𝟓𝟎. 𝟔𝟎𝟎. 𝟐𝟐𝟖. 𝟐𝟐𝟗. 𝟒𝟎𝟏. 𝟒𝟗𝟔. 𝟕𝟎𝟑. 𝟐𝟎𝟓. 𝟑𝟕𝟔
𝑦 = 𝛽0
෣
𝑝𝑟𝑖𝑐𝑒 = −22.02 + 0.13 ⋅ 𝐻𝑜𝑟𝑠𝑒𝑝𝑜𝑤𝑒𝑟 + 0.22 ⋅ 𝑊ℎ𝑒𝑒𝑙𝑏𝑎𝑠𝑒
𝛽1 ⋅ 𝑥1 𝛽2 ⋅ 𝑥2
𝑦 = 𝛽0 + 𝛽1𝑥1 + ⋯ + 𝛽𝑝𝑥𝑝
෣
𝑝𝑟𝑖𝑐𝑒 = −22.02 + 0.13 ⋅ 𝐻𝑜𝑟𝑠𝑒𝑝𝑜𝑤𝑒𝑟 + 0.22 ⋅ 𝑊ℎ𝑒𝑒𝑙𝑏𝑎𝑠𝑒
𝛽1 ⋅ 𝑥1 𝛽2 ⋅ 𝑥2
𝑥𝑖 = ቊ
1 𝑖𝑓 𝑖 𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑓𝑒𝑚𝑎𝑙𝑒
2 𝑖𝑓 𝑖 𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑚𝑎𝑙𝑒
ො
𝑦𝑖 = ቊ
𝛽0 + 𝛽1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑓𝑒𝑚𝑎𝑙𝑒
𝛽0 + 2𝛽1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑚𝑎𝑙𝑒
𝑥𝑖1 = ቊ
1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑓𝑟𝑖𝑐𝑎𝑛
0 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑛𝑜𝑡 𝑎𝑓𝑟𝑖𝑐𝑎𝑛
ො
𝑦𝑖 = 𝛽0 + 𝛽1𝑥𝑖1 + 𝛽2𝑥𝑖2 = ቐ
𝜷𝟎 + 𝜷𝟏 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑓𝑟𝑖𝑐𝑎𝑛
𝜷𝟎 + 𝜷𝟐 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑠𝑖𝑎𝑡𝑖𝑐
𝜷𝟎 𝑖𝑓 𝑡ℎ𝑒 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑐𝑎𝑢𝑐𝑎𝑠𝑖𝑐
𝑥𝑖2 = ቊ
1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑠𝑖𝑎𝑡𝑖𝑐
0 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑛𝑜𝑡 𝑎𝑠𝑖𝑎𝑡𝑖𝑐
𝑦 = 𝛽0 + 𝛽1𝑥1 + 𝛽2𝑥1
𝑛
+ ⋯ + 𝜖𝑖
𝑝 𝑋 =
𝑒𝛽0+𝛽1𝑋
1 + 𝑒𝛽0+𝛽1𝑋
𝑝 𝑋 =
𝑒𝛽0+𝛽1𝑋1+⋯+𝛽𝑝𝑋𝑝
1 + 𝑒𝛽0+𝛽1𝑋1+⋯+𝛽𝑝𝑋𝑝
መ
𝛿𝑘 𝑥 = 𝑥 ⋅
Ƹ
𝜇𝑘
ො
𝜎2
−
Ƹ
𝜇2
2 ො
𝜎2
+ 𝑙𝑜𝑔( ො
𝜋𝑘)
ෝ
𝝅𝒌
•
•
•
FEW CLUSTERS
No
distinctions
between
observations
Too many distinctions between
observations
TOO MANY CLUSTERS
Find the best balance between
information and homogenity
of data associated to each cluster
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R
Introduction to Machine Learning with examples in R

Introduction to Machine Learning with examples in R

  • 2.
  • 3.
  • 9.
  • 13.
  • 14.
     𝜎2 = 4 𝜎 =2 𝜎2 = 0.57 𝜎 = 0.75
  • 16.
    𝒙 𝒚 𝒙𝒊− ഥ 𝒙 𝒚𝒊 − ഥ 𝒚 (𝒙𝒊 − ഥ 𝒙) ⋅ (𝒚𝒊 − ഥ 𝒚)  ഥ 𝒙 ഥ 𝒚
  • 17.
    𝑐𝑜𝑣(𝑋, 𝑌) = σ𝑖=1 𝑛 𝑥𝑖−ҧ 𝑥 ⋅ 𝑦𝑖− ത 𝑦 𝑛
  • 19.
    −1 ≤ 𝑐𝑜𝑣 𝑥,𝑦 𝜎𝑥 ⋅ 𝜎𝑦 ≤ 1
  • 22.
    𝑦𝑖 𝑥𝑖 𝑦𝑖 𝑥𝑖 𝑦𝑖= 𝛽0 + 𝛽1𝑥i + 𝜖𝑖 𝜷𝟎 𝜷𝟏 ො 𝑦𝑖 = 𝛽0 + 𝛽1𝑥i 𝝐𝒊 ො 𝑦𝑖
  • 26.
    𝑦𝑖 ො 𝑦𝑖 𝐚𝐫𝐠 𝒎𝒊𝒏 𝜷𝟎,𝜷𝟏 ෍ 𝒊=𝟏 𝒏 𝝐𝒊 𝟐 arg min 𝛽0, 𝛽1 ෍ 𝑖=1 𝑛 [𝑦𝑖 − (𝛽0 + 𝛽1𝑥𝑖)]2 ො 𝑦𝑖 = 𝛽0 + 𝛽1𝑥𝑖 𝑦𝑖 = ො 𝑦𝑖 + 𝜖𝑖 𝝐𝒊 = 𝒚𝒊 − ෝ 𝒚𝒊 𝜷𝟏 = 𝒄𝒐𝒗 𝒙, 𝒚 𝝈𝒙 𝟐 𝜷𝟎 = ഥ 𝒚 − 𝜷𝟏ഥ 𝒙 ⇒
  • 27.
    𝒙 𝒚 𝒙𝒊− ഥ 𝒙 𝒚𝒊 − ഥ 𝒚 (𝒙𝒊 − ഥ 𝒙) ⋅ (𝒚𝒊 − ഥ 𝒚) 𝒙 − ഥ 𝒙 𝟐 ഥ 𝒙 ഥ 𝒚   𝑟𝑥𝑦 = −125 246 ⋅ 78 = −0,9 𝛽0 = 22 𝛽1 = −0.71 ෍ 𝜖2 = 20.25 ො 𝑦𝑖 = 22 − 0.71 ⋅ 𝑥𝑖 𝛽1 = 𝑐𝑜𝑣 𝑥, 𝑦 𝜎𝑥 = −125 246 = −0.51 ෍ 𝜖2 = 14.48 ො 𝑦𝑖 = 19.1 − 0.51 ⋅ 𝑥𝑖 𝛽0 = ത 𝑦 − 𝛽1 ҧ 𝑥 = 14 + 0.51 ⋅ 10 = 19.1
  • 28.
    ෝ 𝒚 = 𝟐𝟓𝟕𝟗𝟐+ 𝟗𝟒𝟓𝟎 ⋅ 𝒀𝒆𝒂𝒓𝒔𝑬𝒙𝒑𝒆𝒓𝒊𝒆𝒏𝒄𝒆
  • 30.
    ෍ 𝑖=1 𝑛 𝑦𝑖 − ത 𝑦2 ෍ 𝑖=1 𝑛 ො 𝑦𝑖 − ത 𝑦 2 ෍ 𝑖=1 𝑛 𝑦𝑖 − ො 𝑦 2
  • 31.
  • 35.
  • 40.
    𝛽1 𝛽1 𝛽1 𝛽1 ≠0 𝛽1 𝛽1 ≠ 0 𝑯𝟎: 𝛽1 = 0 𝑯𝟏: 𝛽1 ≠ 0 ො 𝑦𝑖 = 𝛽0 ො 𝑦𝑖 = 𝛽0 + 𝛽1𝑋 𝛽𝑖−0 𝑆𝐸(𝛽𝑖)
  • 41.
  • 43.
    𝑦 = 𝛽0+ 𝛽1𝑥1 + ⋯ + 𝛽𝑝𝑥𝑝 + 𝜖𝑖 ො 𝑦𝑖 𝛽𝑗 𝑥𝑖
  • 46.
    𝑯𝟎: 𝛽1 =𝛽2 = ⋯ = 𝛽𝑝 = 0 𝛽𝑗 ≠ 0 𝑯𝟏: ∃𝑗: 𝛽𝑗≠ 0 ො 𝑦 = 𝛽0 ො 𝑦 = 𝛽0 + 𝛽𝑖𝑋𝑖 + … + 𝛽𝑗𝑋𝑗 ≫ 𝐻0
  • 49.
    2𝑝 𝟏. 𝟐𝟔𝟕. 𝟔𝟓𝟎.𝟔𝟎𝟎. 𝟐𝟐𝟖. 𝟐𝟐𝟗. 𝟒𝟎𝟏. 𝟒𝟗𝟔. 𝟕𝟎𝟑. 𝟐𝟎𝟓. 𝟑𝟕𝟔
  • 50.
  • 53.
    ෣ 𝑝𝑟𝑖𝑐𝑒 = −22.02+ 0.13 ⋅ 𝐻𝑜𝑟𝑠𝑒𝑝𝑜𝑤𝑒𝑟 + 0.22 ⋅ 𝑊ℎ𝑒𝑒𝑙𝑏𝑎𝑠𝑒 𝛽1 ⋅ 𝑥1 𝛽2 ⋅ 𝑥2
  • 54.
    𝑦 = 𝛽0+ 𝛽1𝑥1 + ⋯ + 𝛽𝑝𝑥𝑝
  • 55.
    ෣ 𝑝𝑟𝑖𝑐𝑒 = −22.02+ 0.13 ⋅ 𝐻𝑜𝑟𝑠𝑒𝑝𝑜𝑤𝑒𝑟 + 0.22 ⋅ 𝑊ℎ𝑒𝑒𝑙𝑏𝑎𝑠𝑒 𝛽1 ⋅ 𝑥1 𝛽2 ⋅ 𝑥2
  • 58.
    𝑥𝑖 = ቊ 1𝑖𝑓 𝑖 𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑓𝑒𝑚𝑎𝑙𝑒 2 𝑖𝑓 𝑖 𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑚𝑎𝑙𝑒 ො 𝑦𝑖 = ቊ 𝛽0 + 𝛽1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑓𝑒𝑚𝑎𝑙𝑒 𝛽0 + 2𝛽1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑚𝑎𝑙𝑒
  • 59.
    𝑥𝑖1 = ቊ 1𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑓𝑟𝑖𝑐𝑎𝑛 0 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑛𝑜𝑡 𝑎𝑓𝑟𝑖𝑐𝑎𝑛 ො 𝑦𝑖 = 𝛽0 + 𝛽1𝑥𝑖1 + 𝛽2𝑥𝑖2 = ቐ 𝜷𝟎 + 𝜷𝟏 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑓𝑟𝑖𝑐𝑎𝑛 𝜷𝟎 + 𝜷𝟐 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑠𝑖𝑎𝑡𝑖𝑐 𝜷𝟎 𝑖𝑓 𝑡ℎ𝑒 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑐𝑎𝑢𝑐𝑎𝑠𝑖𝑐 𝑥𝑖2 = ቊ 1 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑎𝑠𝑖𝑎𝑡𝑖𝑐 0 𝑖𝑓 𝑖𝑡ℎ 𝑖𝑛𝑑𝑖𝑣𝑖𝑑𝑢𝑎𝑙 𝑖𝑠 𝑛𝑜𝑡 𝑎𝑠𝑖𝑎𝑡𝑖𝑐
  • 61.
    𝑦 = 𝛽0+ 𝛽1𝑥1 + 𝛽2𝑥1 𝑛 + ⋯ + 𝜖𝑖
  • 68.
    𝑝 𝑋 = 𝑒𝛽0+𝛽1𝑋 1+ 𝑒𝛽0+𝛽1𝑋
  • 70.
    𝑝 𝑋 = 𝑒𝛽0+𝛽1𝑋1+⋯+𝛽𝑝𝑋𝑝 1+ 𝑒𝛽0+𝛽1𝑋1+⋯+𝛽𝑝𝑋𝑝
  • 89.
    መ 𝛿𝑘 𝑥 =𝑥 ⋅ Ƹ 𝜇𝑘 ො 𝜎2 − Ƹ 𝜇2 2 ො 𝜎2 + 𝑙𝑜𝑔( ො 𝜋𝑘) ෝ 𝝅𝒌
  • 103.
  • 119.
  • 120.
    Too many distinctionsbetween observations TOO MANY CLUSTERS
  • 121.
    Find the bestbalance between information and homogenity of data associated to each cluster