R:     sesejun@is.ocha.ac.jp          2010/10/21
•                                                contacts_train.csv     •     •                               (setwd      ...
> contacts.train<-read.table("contacts_train.csv", header=T,sep=",")> contacts.train   Pred Young Myope Astimatic Tear1   ...
> contacts.train[1,]    Pred Young Myope Astimatic Tear 1     P     Y     Y         Y    N > contacts.train[,2]   [1] Y Y ...
> class(contacts.train)[1] "data.frame"> forecast <- data.frame(date=c("10/1","10/2","10/3"), weather=c("sunny","sunny","r...
> nrow(contacts.train)[1] 10> ncol(contacts.train)[1] 5> rownames(contacts.train) [1] "1" "2" "3" "4" "5" "6" "7"         ...
> contacts.train$Young  [1] Y Y N N Y Y N N N NLevels: N Y> order(contacts.train$Young)  [1] 3 4 7 8 9 10 1 2 5 6> contact...
> library("mvpart")> rpart(Young~., data=contacts.train, method="class")n= 10node), split, n, loss, yval, (yprob)      * d...
IRIS •   http://archive.ics.uci.edu/ml/machine-learning-databases/iris/     iris.data     •               iris.name     • ...
> library(“mvpart”)> rpart(Class~., data=iris.train, method="class", control=rpart.control(cp=.1))n= 120node), split, n, l...
> iris.dtree<-rpart(Class~., data=iris.train, method="class",control=rpart.control(cp=.1))> plot.new()> plot(iris.dtree,un...
> plot(iris.train$Petal.length, iris.train$Petal.width, pch = c(1,2,3)[unclass(iris.train$Class)])
> iris.test <- read.table("iris_test.csv", sep=",", header=T)> iris.predict <- predict(iris.dtree, iris.test[1:4], type="c...
•    •        •        •        •   rpart       control=rpart.control(cp=.1)   .1    •                                    ...
Upcoming SlideShare
Loading in...5
×

Datamining r 2nd

871

Published on

0 Comments
0 Likes
Statistics
Notes
  • Be the first to comment

  • Be the first to like this

No Downloads
Views
Total Views
871
On Slideshare
0
From Embeds
0
Number of Embeds
1
Actions
Shares
0
Downloads
13
Comments
0
Likes
0
Embeds 0
No embeds

No notes for slide

Datamining r 2nd

  1. 1. R: sesejun@is.ocha.ac.jp 2010/10/21
  2. 2. • contacts_train.csv • • (setwd > > )"Pred","Young","Myope","Astimatic","Tear""P","Y","Y","Y","N""P","Y","Y","N","N""P","N","Y","Y","N""P","N","Y","Y","N""N","Y","Y","Y","Y""N","Y","Y","N","Y""N","N","N","N","Y""N","N","N","N","N""N","N","N","N","Y""N","N","N","N","N" contacts.csv
  3. 3. > contacts.train<-read.table("contacts_train.csv", header=T,sep=",")> contacts.train Pred Young Myope Astimatic Tear1 P Y Y Y N2 P Y Y N N3 P N Y Y N4 P N Y Y N5 N Y Y Y Y6 N Y Y N Y7 N N N N Y8 N N N N N9 N N N N Y10 N N N N N
  4. 4. > contacts.train[1,] Pred Young Myope Astimatic Tear 1 P Y Y Y N > contacts.train[,2] [1] Y Y N N Y Y N N N N Levels: N Y > contacts.train[,"Pred"] [1] P P P P N N N N N N Levels: N P > contacts.train$Pred [1] P P P P N N N N N N Levels: N P> contacts.train[c(-1,-3,-5,-7,-9),] Pred Young Myope Astimatic Tear2 P Y Y N N4 P N Y Y N6 N Y Y N Y8 N N N N N10 N N N N N
  5. 5. > class(contacts.train)[1] "data.frame"> forecast <- data.frame(date=c("10/1","10/2","10/3"), weather=c("sunny","sunny","rain"))> forecast date weather1 10/1 sunny2 10/2 sunny3 10/3 rain> forecast$weather[1] sunny sunny rainLevels: rain sunny> forecast$date[1] 10/1 10/2 10/3
  6. 6. > nrow(contacts.train)[1] 10> ncol(contacts.train)[1] 5> rownames(contacts.train) [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"> colnames(contacts.train)[1] "Pred" "Young" "Myope" "Astimatic" "Tear"> colnames(contacts.train)[2][1] "Young"> colnames(contacts.train)[2] <- "Old"> colnames(contacts.train)[1] "Pred" "Old" "Myope" "Astimatic" "Tear"> colnames(contacts.train)[2] <- "Young"
  7. 7. > contacts.train$Young [1] Y Y N N Y Y N N N NLevels: N Y> order(contacts.train$Young) [1] 3 4 7 8 9 10 1 2 5 6> contacts.train[order(contacts.train$Young),] Pred Young Myope Astimatic Tear3 P N Y Y N4 P N Y Y N7 N N N N Y8 N N N N N9 N N N N Y10 N N N N N1 P Y Y Y N2 P Y Y N N5 N Y Y Y Y6 N Y Y N Y
  8. 8. > library("mvpart")> rpart(Young~., data=contacts.train, method="class")n= 10node), split, n, loss, yval, (yprob) * denotes terminal node1) root 10 4 N (0.6000000 0.4000000) 2) Myope=N 4 0 N (1.0000000 0.0000000) * 3) Myope=Y 6 2 Y (0.3333333 0.6666667) *> rpart(Young~., data=contacts.train, method="class",control=rpart.control(cp=-1))n= 10node), split, n, loss, yval, (yprob) * denotes terminal node1) root 10 4 N (0.6000000 0.4000000) 2) Myope=N 4 0 N (1.0000000 0.0000000) * 3) Myope=Y 6 2 Y (0.3333333 0.6666667) 6) Pred=P 4 2 N (0.5000000 0.5000000) * 7) Pred=N 2 0 Y (0.0000000 1.0000000) *
  9. 9. IRIS • http://archive.ics.uci.edu/ml/machine-learning-databases/iris/ iris.data • iris.name • (setosa, versicolor, virginia) • http://togodb.sel.is.ocha.ac.jp/> iris.train <- read.table("iris_train.csv", sep=",", header=T)> length(rownames(iris.train))[1] 120> length(colnames(iris.train))[1] 5> hist(iris.train$Sepal.length)> hist(iris.train$Petal.length)
  10. 10. > library(“mvpart”)> rpart(Class~., data=iris.train, method="class", control=rpart.control(cp=.1))n= 120node), split, n, loss, yval, (yprob) * denotes terminal node1) root 120 77 Iris-setosa (0.35833333 0.34166667 0.30000000) 2) Petal.length< 2.45 43 0 Iris-setosa (1.00000000 0.000000000.00000000) * 3) Petal.length>=2.45 77 36 Iris-versicolor (0.00000000 0.532467530.46753247) 6) Petal.length< 4.75 37 1 Iris-versicolor (0.00000000 0.972972970.02702703) * 7) Petal.length>=4.75 40 5 Iris-virginica (0.00000000 0.125000000.87500000) *
  11. 11. > iris.dtree<-rpart(Class~., data=iris.train, method="class",control=rpart.control(cp=.1))> plot.new()> plot(iris.dtree,uniform=T,margin=0.5)> text(iris.dtree,use.n=T,all.leaves=F)
  12. 12. > plot(iris.train$Petal.length, iris.train$Petal.width, pch = c(1,2,3)[unclass(iris.train$Class)])
  13. 13. > iris.test <- read.table("iris_test.csv", sep=",", header=T)> iris.predict <- predict(iris.dtree, iris.test[1:4], type="class")> iris.predict 2 4 18 34 Iris-setosa Iris-setosa Iris-setosa Iris-setosa...> iris.predict == iris.test$Class [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE[11] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE[21] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE> sum(iris.predict == iris.test$Class) / length(iris.test$Class)[1] 0.9666667> sum(iris.predict != iris.test$Class) / length(iris.test$Class)[1] 0.03333333
  14. 14. • • • • • rpart control=rpart.control(cp=.1) .1 • 10 • 3 2 3
  1. A particular slide catching your eye?

    Clipping is a handy way to collect important slides you want to go back to later.

×