R:k    apply    sesejun@is.ocha.ac.jp         2009/11/19
USPS
ImageName     Class   0,0   0,1   0,2   0,3   0,4img_2_00_02   1       0     0     0     0     0img_2_00_03   1       0   ...
img_3_29_25   img_5_03_31   img_3_06_30   img_3_17_08
k-NN
Apply Family•                                  ,      ,    •   for    •    apply(X, 1,        )    apply(X, 2,        )app...
1> m <- matrix((1:9)**2, nrow=3)   > l <- list(a=1:3, b=4:6)> m                               > l     [,1] [,2] [,3]      ...
2> m <- matrix((1:9)**2, nrow=3)   > l <- list(a=1:3, b=4:6)> m                               > l     [,1] [,2] [,3]      ...
K-NN •> iris.train <- read.table("iris_train.csv", sep=",", header=T)> iris.test <- read.table("iris_test.csv", sep=",", h...
1> iris.train[order(distquery)[1:5],]> iris.train[order(distquery)[1:5],]$Class> knnclasses <- table(iris.train[order(dist...
>   knnpredict <- function(train,class,query,k) {+   diff <- sweep(train,2,query)+   distquery <- apply(diff * diff, 1, su...
> resvec <- c()> for (i in 1:30) {+ pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10)+ resvec ...
31.    IRIS     1.   IRIS                              4    ("Sepal.length","Sepal.width",          "Petal.length","Petal....
Datamining r 4th
Upcoming SlideShare
Loading in …5
×

Datamining r 4th

860 views

Published on

0 Comments
0 Likes
Statistics
Notes
  • Be the first to comment

  • Be the first to like this

No Downloads
Views
Total views
860
On SlideShare
0
From Embeds
0
Number of Embeds
58
Actions
Shares
0
Downloads
4
Comments
0
Likes
0
Embeds 0
No embeds

No notes for slide

Datamining r 4th

  1. 1. R:k apply sesejun@is.ocha.ac.jp 2009/11/19
  2. 2. USPS
  3. 3. ImageName Class 0,0 0,1 0,2 0,3 0,4img_2_00_02 1 0 0 0 0 0img_2_00_03 1 0 38 22 0 0img_2_00_05 1 13 0 64 13 42...img_0_00_09 -1 34 53 0 38 0img_0_00_28 -1 0 64 0 98 93img_0_01_08 -1 13 0 0 59 13img_0_03_05 -1 34 34 0 0 0
  4. 4. img_3_29_25 img_5_03_31 img_3_06_30 img_3_17_08
  5. 5. k-NN
  6. 6. Apply Family• , , • for • apply(X, 1, ) apply(X, 2, )apply(X, c(1,2), ) lapply(X, ) dataframe sapply(X, ) table sweep(X, M,V) X (M=1) (M=2) (M=c(1,2)) V 7
  7. 7. 1> m <- matrix((1:9)**2, nrow=3) > l <- list(a=1:3, b=4:6)> m > l [,1] [,2] [,3] $a[1,] 1 16 49 [1] 1 2 3[2,] 4 25 64[3,] 9 36 81 $b> apply(m, 1, sum) [1] 4 5 6[1] 66 93 126> apply(m, 2, sum) > lapply(l, sum)[1] 14 77 194 $a> apply(m, c(1,2), sqrt) [1] 6 [,1] [,2] [,3][1,] 1 4 7 $b[2,] 2 5 8 [1] 15[3,] 3 6 9 > sapply(l, sum) a b 6 15 8
  8. 8. 2> m <- matrix((1:9)**2, nrow=3) > l <- list(a=1:3, b=4:6)> m > l [,1] [,2] [,3] $a[1,] 1 16 49 [1] 1 2 3[2,] 4 25 64[3,] 9 36 81 $b> apply(m, 1, sum) [1] 4 5 6[1] 66 93 126> apply(m, 2, sum) > lapply(l, sum)[1] 14 77 194 $a> apply(m, c(1,2), sqrt) [1] 6 [,1] [,2] [,3][1,] 1 4 7 $b[2,] 2 5 8 [1] 15[3,] 3 6 9 > sapply(l, sum) a b 6 15 9
  9. 9. K-NN •> iris.train <- read.table("iris_train.csv", sep=",", header=T)> iris.test <- read.table("iris_test.csv", sep=",", header=T)> q <- iris.test[1,1:4]> diff <- sweep(iris.train[1:4], 2, t(q))> diff * diff> distquery <- apply(diff * diff, 1, sum)> sort(distquery)> order(distquery) 10
  10. 10. 1> iris.train[order(distquery)[1:5],]> iris.train[order(distquery)[1:5],]$Class> knnclasses <- table(iris.train[order(distquery)[1:5],]$Class)> as.factor(table(knnclasses)> sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)> labels(sortedtable)[1]> predclass <- labels(sortedtable)[1]> predclass == iris.test$Class[1] 11
  11. 11. > knnpredict <- function(train,class,query,k) {+ diff <- sweep(train,2,query)+ distquery <- apply(diff * diff, 1, sum)+ knnclasses <- class[order(distquery)[1:k]]+ sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)+ labels(sortedtable)[1]+ }> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]),5)> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]),1)> for (i in 1:length(rownames(iris.test))) {+ pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10)+ result <- pred == iris.test[i,]$Class+ cat(paste(pred, iris.test[i,]$Class, result, sep="t"))+ cat("n")+ } 12
  12. 12. > resvec <- c()> for (i in 1:30) {+ pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10)+ resvec <- append(resvec, pred == iris.test[i,]$Class)+ }> sum(resvec)/length(resvec) 13
  13. 13. 31. IRIS 1. IRIS 4 ("Sepal.length","Sepal.width", "Petal.length","Petal.width") 2. IRIS K-NN2. USPS 1. USPS 5-NN (0-9) 2. K-NN K 3. USPS K-NN K-NN K 4. USPS 14

×