7. reshape2
> install.packages("reshape2")
> library(reshape2)
> head(tips) #
total_bill tip sex smoker day time size
1 16.99 1.01 Female No Sun Dinner 2
2 10.34 1.66 Male No Sun Dinner 3
3 21.01 3.50 Male No Sun Dinner 3
4 23.68 3.31 Male No Sun Dinner 2
5 24.59 3.61 Female No Sun Dinner 4
6 25.29 4.71 Male No Sun Dinner 4
8. tips
total_bill:
tip:
sex: Male, Female
smoker: Yes, No
day: Thur, Fri, Sat, Sun
time: Lunch, Dinner
size:
10. •
•
• subset
• cbind, [, $, [[
• transform, within
•
• subset
• cbind, [, $, [[
• transform, within
•
• order
•
12. > head(tips[c("total_bill", "tip")]) #
total_bill tip
1 16.99 1.01
2 10.34 1.66
3 21.01 3.50
4 23.68 3.31
5 24.59 3.61
6 25.29 4.71
> head(tips[[c("total_bill", "tip")]]) #
Error in .subset2(x, i, exact = exact) : subscript out of bounds
> tips[[c(1, 2)]] # tips[[1]][[2]]
[1] 10.34
13. > tips[1:2, 1:2] #
total_bill tip
1 16.99 1.01
2 10.34 1.66
> tips[1:2, c("total_bill", "tip")] #
total_bill tip
1 16.99 1.01
2 10.34 1.66
> head(tips[-(1:2), -(1:2)]) #
sex smoker day time size
3 Male No Sun Dinner 3
4 Male No Sun Dinner 2
5 Female No Sun Dinner 4
6 Male No Sun Dinner 4
7 Male No Sun Dinner 2
8 Male No Sun Dinner 4
14. subset
> args(subset.data.frame)
function (x, subset, select, drop = FALSE, ...)
NULL
> (tips.vip <- subset(tips, total_bill > 30 & size == 2))
total_bill tip sex smoker day time size
84 32.68 5.00 Male Yes Thur Lunch 2
174 31.85 3.18 Male Yes Sun Dinner 2
176 32.90 3.11 Male Yes Sun Dinner 2
180 34.63 3.55 Male Yes Sun Dinner 2
185 40.55 3.00 Male Yes Sun Dinner 2
238 32.83 1.17 Male Yes Sat Dinner 2
> levels(tips.vip$smoker) #
[1] "No" "Yes"
> levels(droplevels(tips.vip)$smoker) #
[1] "Yes"
15. cbind, [, $, [[
> head(cbind(tips, type = ifelse(tips$tip < 2, " ", " ")), 3)
total_bill tip sex smoker day time size type
1 16.99 1.01 Female No Sun Dinner 2
2 10.34 1.66 Male No Sun Dinner 3
3 21.01 3.50 Male No Sun Dinner 3
> tips$type <- ifelse(tips$tip < 2, " ", " ")
> head(tips, 3)
total_bill tip sex smoker day time size type
1 16.99 1.01 Female No Sun Dinner 2
2 10.34 1.66 Male No Sun Dinner 3
3 21.01 3.50 Male No Sun Dinner 3
> data(tips) #
16. transform, within
> args(transform.data.frame)
function (`_data`, ...)
NULL
> head(transform(tips, type = ifelse(tips$tip < 2, " ", " ")), 3)
total_bill tip sex smoker day time size type
1 16.99 1.01 Female No Sun Dinner 2
2 10.34 1.66 Male No Sun Dinner 3
3 21.01 3.50 Male No Sun Dinner 3
> args(within.data.frame)
function (data, expr, ...)
NULL
> head(within(tips, { type <- c() # within
+ type[tip < 2] <- " "
+ type[tip >= 2] <- " " }), 3)
total_bill tip sex smoker day time size type
1 16.99 1.01 Female No Sun Dinner 2
2 10.34 1.66 Male No Sun Dinner 3
3 21.01 3.50 Male No Sun Dinner 3
17. subset
> # subset
> head(subset(tips, select = c(tip, sex, smoker)), 1)
tip sex smoker
1 1.01 Female No
> head(subset(tips, select = 2:4), 1)
tip sex smoker
1 1.01 Female No
> head(subset(tips, select = -c(total_bill, size, time, day)), 1)
tip sex smoker
1 1.01 Female No
> head(subset(tips, select = -c(1, 5:7)), 1)
tip sex smoker
1 1.01 Female No
> head(subset(tips, select = c(tip:smoker)), 1)
tip sex smoker
1 1.01 Female No
> head(subset(tips, select = -c(total_bill, day:size)), 1)
tip sex smoker
1 1.01 Female No
18. [, $, [[
> # NULL
> tips$size <- NULL
> head(tips, 3)
total_bill tip sex smoker day time
1 16.99 1.01 Female No Sun Dinner
2 10.34 1.66 Male No Sun Dinner
3 21.01 3.50 Male No Sun Dinner
> tips[["time"]] <- NULL
> head(tips, 3)
total_bill tip sex smoker day
1 16.99 1.01 Female No Sun
2 10.34 1.66 Male No Sun
3 21.01 3.50 Male No Sun
> tips["day"] <- NULL; tips[1] <- NULL
> head(tips, 3)
tip sex smoker
1 1.01 Female No
2 1.66 Male No
3 3.50 Male No
> data(tips)
19. transform, within
> # NULL
> head(transform(tips, total_bill = NULL, size = NULL, time = NULL, day =
NULL), 3)
tip sex smoker
1 1.01 Female No
2 1.66 Male No
3 3.50 Male No
> # rm
> head(within(tips, rm(total_bill, size, time, day)), 3)
tip sex smoker
1 1.01 Female No
2 1.66 Male No
3 3.50 Male No
20. > head(transform(tips, tip = 10), 3)
total_bill tip sex smoker day time size
1 16.99 10 Female No Sun Dinner 2
2 10.34 10 Male No Sun Dinner 3
3 21.01 10 Male No Sun Dinner 3
> head(within(tips, tip <- 10), 3)
total_bill tip sex smoker day time size
1 16.99 10 Female No Sun Dinner 2
2 10.34 10 Male No Sun Dinner 3
3 21.01 10 Male No Sun Dinner 3
> tips$tip <- 10
> head(tips, 3)
total_bill tip sex smoker day time size
1 16.99 10 Female No Sun Dinner 2
2 10.34 10 Male No Sun Dinner 3
3 21.01 10 Male No Sun Dinner 3
> data(tips)
21. order
> head(tips[order(tips$sex), ], 4) #
total_bill tip sex smoker day time size
1 16.99 1.01 Female No Sun Dinner 2
5 24.59 3.61 Female No Sun Dinner 4
12 35.26 5.00 Female No Sun Dinner 4
15 14.83 3.02 Female No Sun Dinner 2
> head(tips[order(tips$sex, decreasing = TRUE), ], 4) #
total_bill tip sex smoker day time size
2 10.34 1.66 Male No Sun Dinner 3
3 21.01 3.50 Male No Sun Dinner 3
4 23.68 3.31 Male No Sun Dinner 2
6 25.29 4.71 Male No Sun Dinner 4
> head(tips[order(tips$sex, tips$tip), ], 4) #
total_bill tip sex smoker day time size
68 3.07 1.00 Female Yes Sat Dinner 1
93 5.75 1.00 Female Yes Fri Dinner 2
112 7.25 1.00 Female No Sat Dinner 1
1 16.99 1.01 Female No Sun Dinner 2
22. data.frame
> (tip <- data.frame(date = sample(seq(as.Date("2011-11-09"), by = "day", len = 4)),
+ total_bill = sample(1:4 * 10),
+ tip = sample(1:4)))
date total_bill tip
1 2011-11-10 30 4
2 2011-11-12 40 2
3 2011-11-11 10 1
4 2011-11-09 20 3
> #
> tip <- tip[order(tip$date), ]
> transform(tip, total_bill = cumsum(total_bill), tip = cumsum(tip))
date total_bill tip
4 2011-11-09 20 3
1 2011-11-10 50 7
3 2011-11-11 60 8
2 2011-11-12 100 10
23. > head(tips[c("tip", "total_bill", "sex", "size", "time", "day", "smoker")])
tip total_bill sex size time day smoker
1 10 16.99 Female 2 Dinner Sun No
2 10 10.34 Male 3 Dinner Sun No
3 10 21.01 Male 3 Dinner Sun No
4 10 23.68 Male 2 Dinner Sun No
5 10 24.59 Female 4 Dinner Sun No
6 10 25.29 Male 4 Dinner Sun No
27. table
> args(table)
function (..., exclude = if (useNA == "no") c(NA, NaN), useNA = c("no",
"ifany", "always"), dnn = list.names(...), deparse.level = 1)
NULL
> table(subset(tips, select = c(sex, smoker)))
smoker
sex No Yes
Female 54 33
Male 97 60
> # 4
> table(subset(tips, select = c(sex, smoker, day, size)))
, , day = Fri, size = 1
smoker
sex No Yes
Female 0 0
Male 0 1
28. table
> args(addmargins)
function (A, margin = seq_along(dim(A)), FUN = sum, quiet = FALSE)
NULL
> #
> addmargins(table(subset(tips, select = c(sex, smoker))))
smoker
sex No Yes Sum
Female 54 33 87
Male 97 60 157
Sum 151 93 244
> #
> args(prop.table)
function (x, margin = NULL)
NULL
> prop.table(table(subset(tips, select = c(sex, smoker))))
smoker
sex No Yes
Female 0.2213115 0.1352459
Male 0.3975410 0.2459016
29. xtabs
> args(xtabs)
function (formula = ~., data = parent.frame(), subset, sparse = FALSE,
na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE)
NULL
> #
> xtabs(~ sex + smoker, tips)
smoker
sex No Yes
Female 54 33
Male 97 60
> #
> xtabs(cbind(total_bill, tip) ~ sex + smoker, tips)
, , = total_bill
smoker
sex No Yes
Female 977.68 593.27
Male 1919.75 1337.07
30. aggregate
> args(aggregate.data.frame)
function (x, by, FUN, ..., simplify = TRUE)
NULL
> # FUN 1
> aggregate(tips[c("total_bill", "tip")], tips[c("sex", "day")], sum)
sex day total_bill tip
1 Female Fri 127.31 25.03
2 Male Fri 198.57 26.93
3 Female Sat 551.05 78.45
4 Male Sat 1227.35 181.95
5 Female Sun 357.70 60.61
6 Male Sun 1269.46 186.78
7 Female Thur 534.89 82.42
8 Male Thur 561.44 89.41
> # formula
> aggregate(cbind(total_bill, tip) ~ sex + day, tips, sum)
sex day total_bill tip
1 Female Fri 127.31 25.03
31. by
> args(by)
function (data, INDICES, FUN, ..., simplify = TRUE)
NULL
> # aggregate FUN OK
> (ret <- by(tips[c("total_bill", "tip")], tips[c("sex", "day")], range))
sex: Female
day: Fri
[1] 1.00 22.75
------------------------------------------------------------
sex: Male
day: Fri
[1] 1.50 40.17
> # data.frame
> cbind(expand.grid(dimnames(ret)), do.call(rbind, ret))
sex day 1 2
1 Female Fri 1.00 22.75
2 Male Fri 1.50 40.17
34. reshape
> args(reshape)
function (data, varying = NULL, v.names = NULL, timevar = "time",
idvar = "id", ids = 1L:NROW(data), times = seq_along(varying[[1L]]),
drop = NULL, direction, new.row.names = NULL, sep = ".",
split = if (sep == "") {
list(regexp = "[A-Za-z][0-9]", include = TRUE)
} else {
list(regexp = sep, include = FALSE, fixed = TRUE)
})
NULL
> head(reshape(tips, idvar = c("sex", "smoker", "time", "size"),
+ timevar = "day", drop = "total_bill", direction = "wide"))
sex smoker time size tip.Sun tip.Sat tip.Thur tip.Fri
1 Female No Dinner 2 1.01 2.75 3 3.25
2 Male No Dinner 3 1.66 3.35 NA NA
4 Male No Dinner 2 3.31 4.08 NA 3.50
5 Female No Dinner 4 3.61 2.45 NA NA
6 Male No Dinner 4 4.71 7.58 NA NA
17 Female No Dinner 3 1.67 3.07 NA NA
35. reshape
> # idvar timevar
> (a <- data.frame(a = c(1:3, 1), b = c(1:3, 1), c = 1:4))
a b c
1 1 1 1
2 2 2 2
3 3 3 3
4 1 1 4
> reshape(a, idvar = "a", timevar = "b", direction = "wide")
a c.1 c.2 c.3
1 1 1 NA NA
2 2 NA 2 NA
3 3 NA NA 3
36. merge
> #
> (user.type <- data.frame(sex = rep(c("Male", "Female"), each = 2),
+ smoker = c("Yes", "No"),
+ type = LETTERS[1:4]))
sex smoker type
1 Male Yes A
2 Male No B
3 Female Yes C
4 Female No D
> args(merge.data.frame)
function (x, y, by = intersect(names(x), names(y)), by.x = by,
by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
suffixes = c(".x", ".y"), incomparables = NULL, ...)
NULL
> merge(tips, user.type, by = c("sex", "smoker"), sort = FALSE)[54:55, ]
sex smoker total_bill tip day time size type
54 Female No 10.65 1.50 Thur Lunch 2 D
55 Male No 10.27 1.71 Sun Dinner 2 B
41. reshape2
melt cast
melt
id
> head(tipsm <- melt(tips, measure.vars = c("total_bill", "tip")))
sex smoker day time size variable value
1 Female No Sun Dinner 2 total_bill 16.99
2 Male No Sun Dinner 3 total_bill 10.34
3 Male No Sun Dinner 3 total_bill 21.01
4 Male No Sun Dinner 2 total_bill 23.68
5 Female No Sun Dinner 4 total_bill 24.59
6 Male No Sun Dinner 4 total_bill 25.29
> levels(tipsm$variable)
[1] "total_bill" "tip"
42. melt
> args(melt.data.frame)
function (data, id.vars, measure.vars, variable_name = "variable",
na.rm = !preserve.na, preserve.na = TRUE, ...)
NULL
> # factor id
> head(melt(tips), 1)
Using sex, smoker, day, time as id variables
sex smoker day time variable value
1 Female No Sun Dinner total_bill 16.99
> # id measure
> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size")), 1)
sex smoker day time size variable value
1 Female No Sun Dinner 2 total_bill 16.99
> # id measure
> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size"),
+ measure.vars = "tip"), 1)
sex smoker day time size variable value
1 Female No Sun Dinner 2 tip 1.01
43. cast
formula fun.aggregate
> args(acast) # array acast
function (data, formula, fun.aggregate = NULL, ..., margins = NULL,
subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data))
NULL
> args(dcast) # data.frame dcast
function (data, formula, fun.aggregate = NULL, ..., margins = NULL,
subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data))
NULL
formula
...
.
acast hoge ~ fuga ~ piyo
※dcast 1 hoge ~ fuga + piyo
44. > tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))
> acast(tipsm, sex ~ smoker, length)
No Yes
Female 108 64
Male 194 120
> #
> acast(tipsm, smoker ~ sex, length)
Female Male
No 108 194
Yes 64 120
> #
> acast(tipsm, sex ~ smoker, length, margins = TRUE)
No Yes (all)
Female 108 64 172
Male 194 120 314
(all) 302 184 486