Your SlideShare is downloading. ×
Rデータフレーム自由自在
Upcoming SlideShare
Loading in...5
×

Thanks for flagging this SlideShare!

Oops! An error has occurred.

×
Saving this for later? Get the SlideShare app to save on your phone or tablet. Read anywhere, anytime – even offline.
Text the download link to your phone
Standard text messaging rates apply

Rデータフレーム自由自在

15,942

Published on

Tsukuba.R #9の発表です

Tsukuba.R #9の発表です

0 Comments
56 Likes
Statistics
Notes
  • Be the first to comment

No Downloads
Views
Total Views
15,942
On Slideshare
0
From Embeds
0
Number of Embeds
10
Actions
Shares
0
Downloads
217
Comments
0
Likes
56
Embeds 0
No embeds

Report content
Flagged as inappropriate Flag as inappropriate
Flag as inappropriate

Select your reason for flagging this presentation as inappropriate.

Cancel
No notes for slide

Transcript

  • 1. R Tsukuba.R #9 (2011/11/12) @a_bicky
  • 2. • Takeshi Arabiki 1 ‣ Twitter: @a_bicky ‣ : id:a_bicky• R• http://d.hatena.ne.jp/a_bicky/
  • 3. • Takeshi Arabiki 1 ‣ Twitter: @a_bicky ‣ : id:a_bicky• R SciPy• http://d.hatena.ne.jp/a_bicky/
  • 4. Osaka.R #4 Tokyo.R #16http://www.slideshare.net/abicky/twitterr http://www.slideshare.net/abicky/r-9034336
  • 5. •• R 8 ,9•••• http://www.amazon.co.jp/gp/product/4431712186
  • 6. reshape2> install.packages("reshape2")> library(reshape2)> head(tips) # total_bill tip sex smoker day time size1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 34 23.68 3.31 Male No Sun Dinner 25 24.59 3.61 Female No Sun Dinner 46 25.29 4.71 Male No Sun Dinner 4
  • 7. tips total_bill: tip: sex: Male, Female smoker: Yes, No day: Thur, Fri, Sat, Sun time: Lunch, Dinner size:
  • 8. •• • subset • cbind, [, $, [[ • transform, within• • subset • cbind, [, $, [[ • transform, within•• order•
  • 9. > class(tips)[1] "data.frame"> mode(tips) # data.frame list[1] "list"> head(tips[["total_bill"]]) # list[1] 16.99 10.34 21.01 23.68 24.59 25.29> head(tips$total_bill) #[1] 16.99 10.34 21.01 23.68 24.59 25.29> head(tips["total_bill"]) # data.frame total_bill1 16.992 10.343 21.014 23.685 24.596 25.29
  • 10. > head(tips[c("total_bill", "tip")]) # total_bill tip1 16.99 1.012 10.34 1.663 21.01 3.504 23.68 3.315 24.59 3.616 25.29 4.71> head(tips[[c("total_bill", "tip")]]) #Error in .subset2(x, i, exact = exact) : subscript out of bounds> tips[[c(1, 2)]] # tips[[1]][[2]][1] 10.34
  • 11. > tips[1:2, 1:2] # total_bill tip1 16.99 1.012 10.34 1.66> tips[1:2, c("total_bill", "tip")] # total_bill tip1 16.99 1.012 10.34 1.66> head(tips[-(1:2), -(1:2)]) # sex smoker day time size3 Male No Sun Dinner 34 Male No Sun Dinner 25 Female No Sun Dinner 46 Male No Sun Dinner 47 Male No Sun Dinner 28 Male No Sun Dinner 4
  • 12. subset> args(subset.data.frame)function (x, subset, select, drop = FALSE, ...)NULL> (tips.vip <- subset(tips, total_bill > 30 & size == 2)) total_bill tip sex smoker day time size84 32.68 5.00 Male Yes Thur Lunch 2174 31.85 3.18 Male Yes Sun Dinner 2176 32.90 3.11 Male Yes Sun Dinner 2180 34.63 3.55 Male Yes Sun Dinner 2185 40.55 3.00 Male Yes Sun Dinner 2238 32.83 1.17 Male Yes Sat Dinner 2> levels(tips.vip$smoker) #[1] "No" "Yes"> levels(droplevels(tips.vip)$smoker) #[1] "Yes"
  • 13. cbind, [, $, [[> head(cbind(tips, type = ifelse(tips$tip < 2, " ", " ")), 3) total_bill tip sex smoker day time size type1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 3> tips$type <- ifelse(tips$tip < 2, " ", " ")> head(tips, 3) total_bill tip sex smoker day time size type1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 3> data(tips) #
  • 14. transform, within> args(transform.data.frame)function (`_data`, ...)NULL> head(transform(tips, type = ifelse(tips$tip < 2, " ", " ")), 3) total_bill tip sex smoker day time size type1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 3> args(within.data.frame)function (data, expr, ...)NULL> head(within(tips, { type <- c() # within+ type[tip < 2] <- " "+ type[tip >= 2] <- " " }), 3) total_bill tip sex smoker day time size type1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 3
  • 15. subset> # subset> head(subset(tips, select = c(tip, sex, smoker)), 1) tip sex smoker1 1.01 Female No> head(subset(tips, select = 2:4), 1) tip sex smoker1 1.01 Female No> head(subset(tips, select = -c(total_bill, size, time, day)), 1) tip sex smoker1 1.01 Female No> head(subset(tips, select = -c(1, 5:7)), 1) tip sex smoker1 1.01 Female No> head(subset(tips, select = c(tip:smoker)), 1) tip sex smoker1 1.01 Female No> head(subset(tips, select = -c(total_bill, day:size)), 1) tip sex smoker1 1.01 Female No
  • 16. [, $, [[> # NULL> tips$size <- NULL> head(tips, 3) total_bill tip sex smoker day time1 16.99 1.01 Female No Sun Dinner2 10.34 1.66 Male No Sun Dinner3 21.01 3.50 Male No Sun Dinner> tips[["time"]] <- NULL> head(tips, 3) total_bill tip sex smoker day1 16.99 1.01 Female No Sun2 10.34 1.66 Male No Sun3 21.01 3.50 Male No Sun> tips["day"] <- NULL; tips[1] <- NULL> head(tips, 3) tip sex smoker1 1.01 Female No2 1.66 Male No3 3.50 Male No> data(tips)
  • 17. transform, within> # NULL> head(transform(tips, total_bill = NULL, size = NULL, time = NULL, day =NULL), 3) tip sex smoker1 1.01 Female No2 1.66 Male No3 3.50 Male No> # rm> head(within(tips, rm(total_bill, size, time, day)), 3) tip sex smoker1 1.01 Female No2 1.66 Male No3 3.50 Male No
  • 18. > head(transform(tips, tip = 10), 3) total_bill tip sex smoker day time size1 16.99 10 Female No Sun Dinner 22 10.34 10 Male No Sun Dinner 33 21.01 10 Male No Sun Dinner 3> head(within(tips, tip <- 10), 3) total_bill tip sex smoker day time size1 16.99 10 Female No Sun Dinner 22 10.34 10 Male No Sun Dinner 33 21.01 10 Male No Sun Dinner 3> tips$tip <- 10> head(tips, 3) total_bill tip sex smoker day time size1 16.99 10 Female No Sun Dinner 22 10.34 10 Male No Sun Dinner 33 21.01 10 Male No Sun Dinner 3> data(tips)
  • 19. order> head(tips[order(tips$sex), ], 4) # total_bill tip sex smoker day time size1 16.99 1.01 Female No Sun Dinner 25 24.59 3.61 Female No Sun Dinner 412 35.26 5.00 Female No Sun Dinner 415 14.83 3.02 Female No Sun Dinner 2> head(tips[order(tips$sex, decreasing = TRUE), ], 4) # total_bill tip sex smoker day time size2 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 34 23.68 3.31 Male No Sun Dinner 26 25.29 4.71 Male No Sun Dinner 4> head(tips[order(tips$sex, tips$tip), ], 4) # total_bill tip sex smoker day time size68 3.07 1.00 Female Yes Sat Dinner 193 5.75 1.00 Female Yes Fri Dinner 2112 7.25 1.00 Female No Sat Dinner 11 16.99 1.01 Female No Sun Dinner 2
  • 20. data.frame> (tip <- data.frame(date = sample(seq(as.Date("2011-11-09"), by = "day", len = 4)),+ total_bill = sample(1:4 * 10),+ tip = sample(1:4))) date total_bill tip1 2011-11-10 30 42 2011-11-12 40 23 2011-11-11 10 14 2011-11-09 20 3> #> tip <- tip[order(tip$date), ]> transform(tip, total_bill = cumsum(total_bill), tip = cumsum(tip)) date total_bill tip4 2011-11-09 20 31 2011-11-10 50 73 2011-11-11 60 82 2011-11-12 100 10
  • 21. > head(tips[c("tip", "total_bill", "sex", "size", "time", "day", "smoker")]) tip total_bill sex size time day smoker1 10 16.99 Female 2 Dinner Sun No2 10 10.34 Male 3 Dinner Sun No3 10 21.01 Male 3 Dinner Sun No4 10 23.68 Male 2 Dinner Sun No5 10 24.59 Female 4 Dinner Sun No6 10 25.29 Male 4 Dinner Sun No
  • 22. •• table• xtabs• aggregate• by
  • 23. > args(colSums)function (x, na.rm = FALSE, dims = 1L)NULL> colSums(subset(tips, select = c(total_bill, tip)), na.rm = TRUE)total_bill tip 4827.77 731.58> args(colMeans)function (x, na.rm = FALSE, dims = 1L)NULL> colMeans(subset(tips, select = c(total_bill, tip)), na.rm = TRUE)total_bill tip 19.785943 2.998279> # apply colSums> apply(subset(tips, select = c(total_bill, tip)), 2, sum, na.rm = TRUE)total_bill tip 4827.77 731.58
  • 24. table> args(table)function (..., exclude = if (useNA == "no") c(NA, NaN), useNA = c("no", "ifany", "always"), dnn = list.names(...), deparse.level = 1)NULL> table(subset(tips, select = c(sex, smoker))) smokersex No Yes Female 54 33 Male 97 60> # 4> table(subset(tips, select = c(sex, smoker, day, size))), , day = Fri, size = 1 smokersex No Yes Female 0 0 Male 0 1
  • 25. table> args(addmargins)function (A, margin = seq_along(dim(A)), FUN = sum, quiet = FALSE)NULL> #> addmargins(table(subset(tips, select = c(sex, smoker)))) smokersex No Yes Sum Female 54 33 87 Male 97 60 157 Sum 151 93 244> #> args(prop.table)function (x, margin = NULL)NULL> prop.table(table(subset(tips, select = c(sex, smoker)))) smokersex No Yes Female 0.2213115 0.1352459 Male 0.3975410 0.2459016
  • 26. xtabs> args(xtabs)function (formula = ~., data = parent.frame(), subset, sparse = FALSE, na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE)NULL> #> xtabs(~ sex + smoker, tips) smokersex No Yes Female 54 33 Male 97 60> #> xtabs(cbind(total_bill, tip) ~ sex + smoker, tips), , = total_bill smokersex No Yes Female 977.68 593.27 Male 1919.75 1337.07
  • 27. aggregate> args(aggregate.data.frame)function (x, by, FUN, ..., simplify = TRUE)NULL> # FUN 1> aggregate(tips[c("total_bill", "tip")], tips[c("sex", "day")], sum) sex day total_bill tip1 Female Fri 127.31 25.032 Male Fri 198.57 26.933 Female Sat 551.05 78.454 Male Sat 1227.35 181.955 Female Sun 357.70 60.616 Male Sun 1269.46 186.787 Female Thur 534.89 82.428 Male Thur 561.44 89.41> # formula> aggregate(cbind(total_bill, tip) ~ sex + day, tips, sum) sex day total_bill tip1 Female Fri 127.31 25.03
  • 28. by> args(by)function (data, INDICES, FUN, ..., simplify = TRUE)NULL> # aggregate FUN OK> (ret <- by(tips[c("total_bill", "tip")], tips[c("sex", "day")], range))sex: Femaleday: Fri[1] 1.00 22.75------------------------------------------------------------sex: Maleday: Fri[1] 1.50 40.17> # data.frame> cbind(expand.grid(dimnames(ret)), do.call(rbind, ret)) sex day 1 21 Female Fri 1.00 22.752 Male Fri 1.50 40.17
  • 29. • reshape• merge
  • 30. reshape> args(reshape)function (data, varying = NULL, v.names = NULL, timevar = "time", idvar = "id", ids = 1L:NROW(data), times = seq_along(varying[[1L]]), drop = NULL, direction, new.row.names = NULL, sep = ".", split = if (sep == "") { list(regexp = "[A-Za-z][0-9]", include = TRUE) } else { list(regexp = sep, include = FALSE, fixed = TRUE) })NULL> head(reshape(tips, idvar = c("sex", "smoker", "time", "size"),+ timevar = "day", drop = "total_bill", direction = "wide")) sex smoker time size tip.Sun tip.Sat tip.Thur tip.Fri1 Female No Dinner 2 1.01 2.75 3 3.252 Male No Dinner 3 1.66 3.35 NA NA4 Male No Dinner 2 3.31 4.08 NA 3.505 Female No Dinner 4 3.61 2.45 NA NA6 Male No Dinner 4 4.71 7.58 NA NA17 Female No Dinner 3 1.67 3.07 NA NA
  • 31. reshape> # idvar timevar> (a <- data.frame(a = c(1:3, 1), b = c(1:3, 1), c = 1:4)) a b c1 1 1 12 2 2 23 3 3 34 1 1 4> reshape(a, idvar = "a", timevar = "b", direction = "wide") a c.1 c.2 c.31 1 1 NA NA2 2 NA 2 NA3 3 NA NA 3
  • 32. merge> #> (user.type <- data.frame(sex = rep(c("Male", "Female"), each = 2),+ smoker = c("Yes", "No"),+ type = LETTERS[1:4])) sex smoker type1 Male Yes A2 Male No B3 Female Yes C4 Female No D> args(merge.data.frame)function (x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE, suffixes = c(".x", ".y"), incomparables = NULL, ...)NULL> merge(tips, user.type, by = c("sex", "smoker"), sort = FALSE)[54:55, ] sex smoker total_bill tip day time size type54 Female No 10.65 1.50 Thur Lunch 2 D55 Male No 10.27 1.71 Sun Dinner 2 B
  • 33. •• R• reshape2• melt• cast•
  • 34. Excel
  • 35. R> acast(melt(tips, id.var = c("sex", "smoker", "day"), measure.var = "tip"),+ sex + smoker ~ day, sum, margins = TRUE) Fri Sat Sun Thur (all)Female_No 6.25 35.42 46.61 61.49 149.77Female_Yes 18.78 43.03 14.00 18.93 94.74Female_(all) 25.03 78.45 60.61 80.42 244.51Male_No 5.00 104.21 133.96 58.83 302.00Male_Yes 21.93 77.74 52.82 30.58 183.07Male_(all) 26.93 181.95 186.78 89.41 485.07(all)_(all) 51.96 260.40 247.39 169.83 729.58 reshape2
  • 36. reshape2 melt cast melt id> head(tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))) sex smoker day time size variable value1 Female No Sun Dinner 2 total_bill 16.992 Male No Sun Dinner 3 total_bill 10.343 Male No Sun Dinner 3 total_bill 21.014 Male No Sun Dinner 2 total_bill 23.685 Female No Sun Dinner 4 total_bill 24.596 Male No Sun Dinner 4 total_bill 25.29> levels(tipsm$variable)[1] "total_bill" "tip"
  • 37. melt> args(melt.data.frame)function (data, id.vars, measure.vars, variable_name = "variable", na.rm = !preserve.na, preserve.na = TRUE, ...)NULL> # factor id> head(melt(tips), 1)Using sex, smoker, day, time as id variables sex smoker day time variable value1 Female No Sun Dinner total_bill 16.99> # id measure> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size")), 1) sex smoker day time size variable value1 Female No Sun Dinner 2 total_bill 16.99> # id measure> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size"),+ measure.vars = "tip"), 1) sex smoker day time size variable value1 Female No Sun Dinner 2 tip 1.01
  • 38. castformula fun.aggregate > args(acast) # array acast function (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data)) NULL > args(dcast) # data.frame dcast function (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data)) NULL formula ... . acast hoge ~ fuga ~ piyo ※dcast 1 hoge ~ fuga + piyo
  • 39. > tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))> acast(tipsm, sex ~ smoker, length) No YesFemale 108 64Male 194 120> #> acast(tipsm, smoker ~ sex, length) Female MaleNo 108 194Yes 64 120> #> acast(tipsm, sex ~ smoker, length, margins = TRUE) No Yes (all)Female 108 64 172Male 194 120 314(all) 302 184 486
  • 40. > # size> acast(tipsm, smoker ~ sex + size, length) Female_1 Female_2 Female_3 Female_4 Female_5 Female_6 Male_1 Male_2Male_3No 4 66 18 14 2 4 0 11434Yes 2 48 10 4 0 0 2 8214 Male_4 Male_5 Male_6No 38 4 4Yes 18 4 0> # 3> acast(tipsm, smoker ~ sex ~ size, length), , 1 Female MaleNo 4 0Yes 2 2
  • 41. > # sum> acast(tipsm, sex ~ day, sum) Fri Sat Sun ThurFemale 152.34 629.5 418.31 617.31 total_bill tipMale 225.50 1409.3 1456.24 650.85> # total_bill tip sum> acast(tipsm, sex + variable ~ day, sum) Fri Sat Sun ThurFemale_total_bill 127.31 551.05 357.70 534.89Female_tip 25.03 78.45 60.61 82.42Male_total_bill 198.57 1227.35 1269.46 561.44Male_tip 26.93 181.95 186.78 89.41> # tip sum> acast(tipsm, sex ~ day, sum, subset = .(variable == "tip")) Fri Sat Sun ThurFemale 25.03 78.45 60.61 82.42Male 26.93 181.95 186.78 89.41
  • 42. reshape2 aggregate table xtabs

×