R
    Tsukuba.R #9 (2011/11/12)
                    @a_bicky
• Takeshi Arabiki                         1

    ‣ Twitter: @a_bicky
    ‣        : id:a_bicky

•
                              R

•
                http://d.hatena.ne.jp/a_bicky/
• Takeshi Arabiki                          1

    ‣ Twitter: @a_bicky
    ‣        : id:a_bicky

•
                              R        SciPy



•
                http://d.hatena.ne.jp/a_bicky/
Osaka.R #4                                     Tokyo.R #16




http://www.slideshare.net/abicky/twitterr   http://www.slideshare.net/abicky/r-9034336
•
•             R                    8   ,9

•
•
•
•

    http://www.amazon.co.jp/gp/product/4431712186
reshape2
> install.packages("reshape2")
> library(reshape2)
> head(tips) #
  total_bill tip     sex smoker   day     time size
1      16.99 1.01 Female     No   Sun   Dinner    2
2      10.34 1.66   Male     No   Sun   Dinner    3
3      21.01 3.50   Male     No   Sun   Dinner    3
4      23.68 3.31   Male     No   Sun   Dinner    2
5      24.59 3.61 Female     No   Sun   Dinner    4
6      25.29 4.71   Male     No   Sun   Dinner    4
tips

  total_bill:
  tip:
  sex:          Male, Female
  smoker:                             Yes, No
  day:          Thur, Fri, Sat, Sun
  time:             Lunch, Dinner
  size:
•
•
    •   subset
    •   cbind, [, $, [[
    •   transform, within
•
    •   subset
    •   cbind, [, $, [[
    •   transform, within
•
•                           order
•
> class(tips)
[1] "data.frame"
> mode(tips)     # data.frame list
[1] "list"
> head(tips[["total_bill"]])    # list
[1] 16.99 10.34 21.01 23.68 24.59 25.29
> head(tips$total_bill)         #
[1] 16.99 10.34 21.01 23.68 24.59 25.29
> head(tips["total_bill"])      #       data.frame
  total_bill
1      16.99
2      10.34
3      21.01
4      23.68
5      24.59
6      25.29
> head(tips[c("total_bill", "tip")]) #
  total_bill tip
1      16.99 1.01
2      10.34 1.66
3      21.01 3.50
4      23.68 3.31
5      24.59 3.61
6      25.29 4.71
> head(tips[[c("total_bill", "tip")]]) #
Error in .subset2(x, i, exact = exact) : subscript out of bounds
> tips[[c(1, 2)]] # tips[[1]][[2]]
[1] 10.34
> tips[1:2, 1:2]    #
  total_bill tip
1      16.99 1.01
2      10.34 1.66
> tips[1:2, c("total_bill", "tip")]   #
  total_bill tip
1      16.99 1.01
2      10.34 1.66
> head(tips[-(1:2), -(1:2)])    #
     sex smoker day   time size
3   Male     No Sun Dinner    3
4   Male     No Sun Dinner    2
5 Female     No Sun Dinner    4
6   Male     No Sun Dinner    4
7   Male     No Sun Dinner    2
8   Male     No Sun Dinner    4
subset


> args(subset.data.frame)
function (x, subset, select, drop = FALSE, ...)
NULL
> (tips.vip <- subset(tips, total_bill > 30 & size == 2))
     total_bill tip sex smoker day     time size
84        32.68 5.00 Male   Yes Thur Lunch      2
174       31.85 3.18 Male   Yes Sun Dinner      2
176       32.90 3.11 Male   Yes Sun Dinner      2
180       34.63 3.55 Male   Yes Sun Dinner      2
185       40.55 3.00 Male   Yes Sun Dinner      2
238       32.83 1.17 Male   Yes Sat Dinner      2
> levels(tips.vip$smoker) #
[1] "No" "Yes"
> levels(droplevels(tips.vip)$smoker)   #
[1] "Yes"
cbind, [, $, [[


> head(cbind(tips, type = ifelse(tips$tip < 2, "         ", "   ")), 3)
  total_bill tip     sex smoker day   time size     type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner      3
3      21.01 3.50   Male     No Sun Dinner      3
> tips$type <- ifelse(tips$tip < 2, "    ", "       ")
> head(tips, 3)
  total_bill tip     sex smoker day   time size     type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner      3
3      21.01 3.50   Male     No Sun Dinner      3
> data(tips)   #
transform, within

> args(transform.data.frame)
function (`_data`, ...)
NULL
> head(transform(tips, type = ifelse(tips$tip < 2, "         ", "   ")), 3)
  total_bill tip     sex smoker day   time size       type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner        3
3      21.01 3.50   Male     No Sun Dinner        3
> args(within.data.frame)
function (data, expr, ...)
NULL
> head(within(tips, { type <- c() # within
+                     type[tip < 2] <- "      "
+                     type[tip >= 2] <- "    " }), 3)
  total_bill tip     sex smoker day   time size       type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner        3
3      21.01 3.50   Male     No Sun Dinner        3
subset

> # subset
> head(subset(tips, select   = c(tip, sex, smoker)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = 2:4), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = -c(total_bill, size, time, day)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = -c(1, 5:7)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = c(tip:smoker)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = -c(total_bill, day:size)), 1)
   tip    sex smoker
1 1.01 Female     No
[, $, [[

> # NULL
> tips$size <- NULL
> head(tips, 3)
  total_bill tip      sex smoker day   time
1      16.99 1.01 Female      No Sun Dinner
2      10.34 1.66    Male     No Sun Dinner
3      21.01 3.50    Male     No Sun Dinner
> tips[["time"]] <- NULL
> head(tips, 3)
  total_bill tip      sex smoker day
1      16.99 1.01 Female      No Sun
2      10.34 1.66    Male     No Sun
3      21.01 3.50    Male     No Sun
> tips["day"] <- NULL; tips[1] <- NULL
> head(tips, 3)
   tip    sex smoker
1 1.01 Female     No
2 1.66   Male     No
3 3.50   Male     No
> data(tips)
transform, within


> # NULL
> head(transform(tips, total_bill = NULL, size = NULL, time = NULL, day =
NULL), 3)
   tip     sex smoker
1 1.01 Female      No
2 1.66    Male     No
3 3.50    Male     No
> # rm
> head(within(tips, rm(total_bill, size, time, day)), 3)
   tip     sex smoker
1 1.01 Female      No
2 1.66    Male     No
3 3.50    Male     No
> head(transform(tips, tip = 10), 3)
  total_bill tip    sex smoker day   time size
1      16.99 10 Female      No Sun Dinner    2
2      10.34 10    Male     No Sun Dinner    3
3      21.01 10    Male     No Sun Dinner    3
> head(within(tips, tip <- 10), 3)
  total_bill tip    sex smoker day   time size
1      16.99 10 Female      No Sun Dinner    2
2      10.34 10    Male     No Sun Dinner    3
3      21.01 10    Male     No Sun Dinner    3
> tips$tip <- 10
> head(tips, 3)
  total_bill tip    sex smoker day   time size
1      16.99 10 Female      No Sun Dinner    2
2      10.34 10    Male     No Sun Dinner    3
3      21.01 10    Male     No Sun Dinner    3
> data(tips)
order


> head(tips[order(tips$sex), ], 4) #
    total_bill tip      sex smoker day    time size
1        16.99 1.01 Female      No Sun Dinner     2
5        24.59 3.61 Female      No Sun Dinner     4
12       35.26 5.00 Female      No Sun Dinner     4
15       14.83 3.02 Female      No Sun Dinner     2
> head(tips[order(tips$sex, decreasing = TRUE), ], 4)   #
   total_bill tip sex smoker day       time size
2       10.34 1.66 Male      No Sun Dinner     3
3       21.01 3.50 Male      No Sun Dinner     3
4       23.68 3.31 Male      No Sun Dinner     2
6       25.29 4.71 Male      No Sun Dinner     4
> head(tips[order(tips$sex, tips$tip), ], 4) #
     total_bill tip      sex smoker day     time size
68         3.07 1.00 Female     Yes Sat Dinner      1
93         5.75 1.00 Female     Yes Fri Dinner      2
112        7.25 1.00 Female      No Sat Dinner      1
1         16.99 1.01 Female      No Sun Dinner      2
data.frame


> (tip <- data.frame(date = sample(seq(as.Date("2011-11-09"), by = "day", len = 4)),
+                   total_bill = sample(1:4 * 10),
+                   tip = sample(1:4)))
        date total_bill tip
1 2011-11-10         30   4
2 2011-11-12         40   2
3 2011-11-11         10   1
4 2011-11-09         20   3
> #
> tip <- tip[order(tip$date), ]
> transform(tip, total_bill = cumsum(total_bill), tip = cumsum(tip))
        date total_bill tip
4 2011-11-09         20   3
1 2011-11-10         50   7
3 2011-11-11         60   8
2 2011-11-12        100 10
> head(tips[c("tip", "total_bill", "sex", "size", "time", "day", "smoker")])
  tip total_bill    sex size   time day smoker
1 10       16.99 Female    2 Dinner Sun     No
2 10       10.34   Male    3 Dinner Sun     No
3 10       21.01   Male    3 Dinner Sun     No
4 10       23.68   Male    2 Dinner Sun     No
5 10       24.59 Female    4 Dinner Sun     No
6 10       25.29   Male    4 Dinner Sun     No
•
•   table
•   xtabs
•           aggregate
•           by
> args(colSums)
function (x, na.rm = FALSE, dims = 1L)
NULL
> colSums(subset(tips, select = c(total_bill, tip)), na.rm = TRUE)
total_bill         tip
   4827.77      731.58
> args(colMeans)
function (x, na.rm = FALSE, dims = 1L)
NULL
> colMeans(subset(tips, select = c(total_bill, tip)), na.rm = TRUE)
total_bill         tip
 19.785943   2.998279
> # apply                colSums
> apply(subset(tips, select = c(total_bill, tip)), 2, sum, na.rm = TRUE)
total_bill         tip
   4827.77      731.58
table

> args(table)
function (..., exclude = if    (useNA == "no") c(NA, NaN), useNA = c("no",
     "ifany", "always"), dnn   = list.names(...), deparse.level = 1)
NULL
> table(subset(tips, select    = c(sex, smoker)))
         smoker
sex       No Yes
  Female 54 33
  Male    97 60
>    # 4
> table(subset(tips, select    = c(sex, smoker, day, size)))
, , day = Fri, size = 1

        smoker
sex      No Yes
  Female 0     0
  Male    0    1
table

> args(addmargins)
function (A, margin = seq_along(dim(A)), FUN = sum, quiet = FALSE)
NULL
> #
> addmargins(table(subset(tips, select = c(sex, smoker))))
        smoker
sex       No Yes Sum
  Female 54 33 87
  Male    97 60 157
  Sum    151 93 244
> #
> args(prop.table)
function (x, margin = NULL)
NULL
> prop.table(table(subset(tips, select = c(sex, smoker))))
        smoker
sex             No       Yes
  Female 0.2213115 0.1352459
  Male   0.3975410 0.2459016
xtabs

> args(xtabs)
function (formula = ~., data = parent.frame(), subset, sparse = FALSE,
     na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE)
NULL
> #
> xtabs(~ sex + smoker, tips)
         smoker
sex       No Yes
  Female 54 33
  Male    97 60
> #
> xtabs(cbind(total_bill, tip) ~ sex + smoker, tips)
, , = total_bill

        smoker
sex           No     Yes
  Female 977.68 593.27
  Male   1919.75 1337.07
aggregate

> args(aggregate.data.frame)
function (x, by, FUN, ..., simplify = TRUE)
NULL
> # FUN              1
> aggregate(tips[c("total_bill", "tip")], tips[c("sex", "day")], sum)
      sex day total_bill     tip
1 Female Fri      127.31 25.03
2    Male Fri     198.57 26.93
3 Female Sat      551.05 78.45
4    Male Sat    1227.35 181.95
5 Female Sun      357.70 60.61
6    Male Sun    1269.46 186.78
7 Female Thur     534.89 82.42
8    Male Thur    561.44 89.41
> # formula
> aggregate(cbind(total_bill, tip) ~ sex + day, tips, sum)
      sex day total_bill     tip
1 Female Fri      127.31 25.03
by

> args(by)
function (data, INDICES, FUN, ..., simplify = TRUE)
NULL
> # aggregate          FUN               OK
> (ret <- by(tips[c("total_bill", "tip")], tips[c("sex", "day")], range))
sex: Female
day: Fri
[1] 1.00 22.75
------------------------------------------------------------
sex: Male
day: Fri
[1] 1.50 40.17


> # data.frame
> cbind(expand.grid(dimnames(ret)), do.call(rbind, ret))
     sex day     1     2
1 Female Fri 1.00 22.75
2   Male Fri 1.50 40.17
•           reshape
•   merge
reshape

> args(reshape)
function (data, varying = NULL, v.names = NULL, timevar = "time",
     idvar = "id", ids = 1L:NROW(data), times = seq_along(varying[[1L]]),
     drop = NULL, direction, new.row.names = NULL, sep = ".",
     split = if (sep == "") {
          list(regexp = "[A-Za-z][0-9]", include = TRUE)
     } else {
          list(regexp = sep, include = FALSE, fixed = TRUE)
     })
NULL
> head(reshape(tips, idvar = c("sex", "smoker", "time", "size"),
+                timevar = "day", drop = "total_bill", direction = "wide"))
        sex smoker   time size tip.Sun tip.Sat tip.Thur tip.Fri
1 Female        No Dinner    2    1.01    2.75        3     3.25
2     Male      No Dinner    3    1.66    3.35       NA       NA
4     Male      No Dinner    2    3.31    4.08       NA     3.50
5 Female        No Dinner    4    3.61    2.45       NA       NA
6     Male      No Dinner    4    4.71    7.58       NA       NA
17 Female       No Dinner    3    1.67    3.07       NA       NA
reshape


> # idvar    timevar
> (a <- data.frame(a = c(1:3, 1), b = c(1:3, 1), c = 1:4))
  a b c
1 1 1 1
2 2 2 2
3 3 3 3
4 1 1 4
> reshape(a, idvar = "a", timevar = "b", direction = "wide")
  a c.1 c.2 c.3
1 1   1 NA NA
2 2 NA    2 NA
3 3 NA NA     3
merge

> #
> (user.type <- data.frame(sex = rep(c("Male", "Female"), each = 2),
+                           smoker = c("Yes", "No"),
+                           type = LETTERS[1:4]))
      sex smoker type
1    Male    Yes    A
2    Male     No    B
3 Female     Yes    C
4 Female      No    D
> args(merge.data.frame)
function (x, y, by = intersect(names(x), names(y)), by.x = by,
     by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
     suffixes = c(".x", ".y"), incomparables = NULL, ...)
NULL
> merge(tips, user.type, by = c("sex", "smoker"), sort = FALSE)[54:55, ]
       sex smoker total_bill tip day     time size type
54 Female      No      10.65 1.50 Thur Lunch      2   D
55    Male     No      10.27 1.71 Sun Dinner      2   B
•
•   R
•   reshape2
•   melt
•   cast
•
Excel
R

> acast(melt(tips, id.var = c("sex", "smoker", "day"), measure.var = "tip"),
+        sex + smoker ~ day, sum, margins = TRUE)
                Fri    Sat    Sun   Thur (all)
Female_No      6.25 35.42 46.61 61.49 149.77
Female_Yes    18.78 43.03 14.00 18.93 94.74
Female_(all) 25.03 78.45 60.61 80.42 244.51
Male_No        5.00 104.21 133.96 58.83 302.00
Male_Yes      21.93 77.74 52.82 30.58 183.07
Male_(all)    26.93 181.95 186.78 89.41 485.07
(all)_(all) 51.96 260.40 247.39 169.83 729.58




                                                       reshape2
reshape2

  melt                                    cast

 melt
 id
> head(tipsm <- melt(tips, measure.vars = c("total_bill", "tip")))
     sex smoker day    time size   variable value
1 Female     No Sun Dinner     2 total_bill 16.99
2   Male     No Sun Dinner     3 total_bill 10.34
3   Male     No Sun Dinner     3 total_bill 21.01
4   Male     No Sun Dinner     2 total_bill 23.68
5 Female     No Sun Dinner     4 total_bill 24.59
6   Male     No Sun Dinner     4 total_bill 25.29
> levels(tipsm$variable)
[1] "total_bill" "tip"
melt
> args(melt.data.frame)
function (data, id.vars, measure.vars, variable_name = "variable",
     na.rm = !preserve.na, preserve.na = TRUE, ...)
NULL
> #                  factor     id
> head(melt(tips), 1)
Using sex, smoker, day, time as id variables
      sex smoker day   time   variable value
1 Female      No Sun Dinner total_bill 16.99
> # id     measure
> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size")), 1)
      sex smoker day   time size   variable value
1 Female      No Sun Dinner    2 total_bill 16.99
> # id      measure
> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size"),
+                  measure.vars = "tip"), 1)
      sex smoker day   time size variable value
1 Female      No Sun Dinner    2      tip 1.01
cast
formula                                    fun.aggregate
 > args(acast) #         array                       acast
 function (data, formula, fun.aggregate    = NULL, ..., margins = NULL,
      subset = NULL, fill = NULL, drop =   TRUE, value_var = guess_value(data))
 NULL
 > args(dcast) #         data.frame                          dcast
 function (data, formula, fun.aggregate    = NULL, ..., margins = NULL,
      subset = NULL, fill = NULL, drop =   TRUE, value_var = guess_value(data))
 NULL


     formula
 ...
 .
 acast     hoge ~ fuga ~ piyo
 ※dcast     1                              hoge ~ fuga + piyo
> tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))
> acast(tipsm, sex ~ smoker, length)
         No Yes
Female 108 64
Male    194 120
> #
> acast(tipsm, smoker ~ sex, length)
     Female Male
No      108 194
Yes      64 120
> #
> acast(tipsm, sex ~ smoker, length, margins = TRUE)
         No Yes (all)
Female 108 64     172
Male    194 120   314
(all) 302 184     486
> # size
> acast(tipsm, smoker ~ sex + size, length)
    Female_1 Female_2 Female_3 Female_4 Female_5 Female_6 Male_1 Male_2
Male_3
No         4       66       18       14        2        4      0    114
34
Yes        2       48       10        4        0        0      2     82
14
    Male_4 Male_5 Male_6
No      38      4      4
Yes     18      4      0
> # 3
> acast(tipsm, smoker ~ sex ~ size, length)
, , 1

    Female Male
No       4    0
Yes      2    2
> #             sum
> acast(tipsm, sex ~ day, sum)
           Fri   Sat      Sun    Thur
Female 152.34 629.5 418.31 617.31 total_bill       tip
Male   225.50 1409.3 1456.24 650.85
> # total_bill      tip             sum
> acast(tipsm, sex + variable ~ day, sum)
                      Fri      Sat      Sun  Thur
Female_total_bill 127.31 551.05 357.70 534.89
Female_tip          25.03    78.45    60.61 82.42
Male_total_bill   198.57 1227.35 1269.46 561.44
Male_tip            26.93 181.95 186.78 89.41
> #             tip      sum
> acast(tipsm, sex ~ day, sum, subset = .(variable == "tip"))
         Fri    Sat     Sun Thur
Female 25.03 78.45 60.61 82.42
Male   26.93 181.95 186.78 89.41
reshape2   aggregate table   xtabs
Rデータフレーム自由自在

Rデータフレーム自由自在

  • 1.
    R Tsukuba.R #9 (2011/11/12) @a_bicky
  • 2.
    • Takeshi Arabiki 1 ‣ Twitter: @a_bicky ‣ : id:a_bicky • R • http://d.hatena.ne.jp/a_bicky/
  • 3.
    • Takeshi Arabiki 1 ‣ Twitter: @a_bicky ‣ : id:a_bicky • R SciPy • http://d.hatena.ne.jp/a_bicky/
  • 4.
    Osaka.R #4 Tokyo.R #16 http://www.slideshare.net/abicky/twitterr http://www.slideshare.net/abicky/r-9034336
  • 5.
    • • R 8 ,9 • • • • http://www.amazon.co.jp/gp/product/4431712186
  • 7.
    reshape2 > install.packages("reshape2") > library(reshape2) >head(tips) # total_bill tip sex smoker day time size 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 4 23.68 3.31 Male No Sun Dinner 2 5 24.59 3.61 Female No Sun Dinner 4 6 25.29 4.71 Male No Sun Dinner 4
  • 8.
    tips total_bill: tip: sex: Male, Female smoker: Yes, No day: Thur, Fri, Sat, Sun time: Lunch, Dinner size:
  • 10.
    • • • subset • cbind, [, $, [[ • transform, within • • subset • cbind, [, $, [[ • transform, within • • order •
  • 11.
    > class(tips) [1] "data.frame" >mode(tips) # data.frame list [1] "list" > head(tips[["total_bill"]]) # list [1] 16.99 10.34 21.01 23.68 24.59 25.29 > head(tips$total_bill) # [1] 16.99 10.34 21.01 23.68 24.59 25.29 > head(tips["total_bill"]) # data.frame total_bill 1 16.99 2 10.34 3 21.01 4 23.68 5 24.59 6 25.29
  • 12.
    > head(tips[c("total_bill", "tip")])# total_bill tip 1 16.99 1.01 2 10.34 1.66 3 21.01 3.50 4 23.68 3.31 5 24.59 3.61 6 25.29 4.71 > head(tips[[c("total_bill", "tip")]]) # Error in .subset2(x, i, exact = exact) : subscript out of bounds > tips[[c(1, 2)]] # tips[[1]][[2]] [1] 10.34
  • 13.
    > tips[1:2, 1:2] # total_bill tip 1 16.99 1.01 2 10.34 1.66 > tips[1:2, c("total_bill", "tip")] # total_bill tip 1 16.99 1.01 2 10.34 1.66 > head(tips[-(1:2), -(1:2)]) # sex smoker day time size 3 Male No Sun Dinner 3 4 Male No Sun Dinner 2 5 Female No Sun Dinner 4 6 Male No Sun Dinner 4 7 Male No Sun Dinner 2 8 Male No Sun Dinner 4
  • 14.
    subset > args(subset.data.frame) function (x,subset, select, drop = FALSE, ...) NULL > (tips.vip <- subset(tips, total_bill > 30 & size == 2)) total_bill tip sex smoker day time size 84 32.68 5.00 Male Yes Thur Lunch 2 174 31.85 3.18 Male Yes Sun Dinner 2 176 32.90 3.11 Male Yes Sun Dinner 2 180 34.63 3.55 Male Yes Sun Dinner 2 185 40.55 3.00 Male Yes Sun Dinner 2 238 32.83 1.17 Male Yes Sat Dinner 2 > levels(tips.vip$smoker) # [1] "No" "Yes" > levels(droplevels(tips.vip)$smoker) # [1] "Yes"
  • 15.
    cbind, [, $,[[ > head(cbind(tips, type = ifelse(tips$tip < 2, " ", " ")), 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 > tips$type <- ifelse(tips$tip < 2, " ", " ") > head(tips, 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 > data(tips) #
  • 16.
    transform, within > args(transform.data.frame) function(`_data`, ...) NULL > head(transform(tips, type = ifelse(tips$tip < 2, " ", " ")), 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 > args(within.data.frame) function (data, expr, ...) NULL > head(within(tips, { type <- c() # within + type[tip < 2] <- " " + type[tip >= 2] <- " " }), 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3
  • 17.
    subset > # subset >head(subset(tips, select = c(tip, sex, smoker)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = 2:4), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = -c(total_bill, size, time, day)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = -c(1, 5:7)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = c(tip:smoker)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = -c(total_bill, day:size)), 1) tip sex smoker 1 1.01 Female No
  • 18.
    [, $, [[ ># NULL > tips$size <- NULL > head(tips, 3) total_bill tip sex smoker day time 1 16.99 1.01 Female No Sun Dinner 2 10.34 1.66 Male No Sun Dinner 3 21.01 3.50 Male No Sun Dinner > tips[["time"]] <- NULL > head(tips, 3) total_bill tip sex smoker day 1 16.99 1.01 Female No Sun 2 10.34 1.66 Male No Sun 3 21.01 3.50 Male No Sun > tips["day"] <- NULL; tips[1] <- NULL > head(tips, 3) tip sex smoker 1 1.01 Female No 2 1.66 Male No 3 3.50 Male No > data(tips)
  • 19.
    transform, within > #NULL > head(transform(tips, total_bill = NULL, size = NULL, time = NULL, day = NULL), 3) tip sex smoker 1 1.01 Female No 2 1.66 Male No 3 3.50 Male No > # rm > head(within(tips, rm(total_bill, size, time, day)), 3) tip sex smoker 1 1.01 Female No 2 1.66 Male No 3 3.50 Male No
  • 20.
    > head(transform(tips, tip= 10), 3) total_bill tip sex smoker day time size 1 16.99 10 Female No Sun Dinner 2 2 10.34 10 Male No Sun Dinner 3 3 21.01 10 Male No Sun Dinner 3 > head(within(tips, tip <- 10), 3) total_bill tip sex smoker day time size 1 16.99 10 Female No Sun Dinner 2 2 10.34 10 Male No Sun Dinner 3 3 21.01 10 Male No Sun Dinner 3 > tips$tip <- 10 > head(tips, 3) total_bill tip sex smoker day time size 1 16.99 10 Female No Sun Dinner 2 2 10.34 10 Male No Sun Dinner 3 3 21.01 10 Male No Sun Dinner 3 > data(tips)
  • 21.
    order > head(tips[order(tips$sex), ],4) # total_bill tip sex smoker day time size 1 16.99 1.01 Female No Sun Dinner 2 5 24.59 3.61 Female No Sun Dinner 4 12 35.26 5.00 Female No Sun Dinner 4 15 14.83 3.02 Female No Sun Dinner 2 > head(tips[order(tips$sex, decreasing = TRUE), ], 4) # total_bill tip sex smoker day time size 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 4 23.68 3.31 Male No Sun Dinner 2 6 25.29 4.71 Male No Sun Dinner 4 > head(tips[order(tips$sex, tips$tip), ], 4) # total_bill tip sex smoker day time size 68 3.07 1.00 Female Yes Sat Dinner 1 93 5.75 1.00 Female Yes Fri Dinner 2 112 7.25 1.00 Female No Sat Dinner 1 1 16.99 1.01 Female No Sun Dinner 2
  • 22.
    data.frame > (tip <-data.frame(date = sample(seq(as.Date("2011-11-09"), by = "day", len = 4)), + total_bill = sample(1:4 * 10), + tip = sample(1:4))) date total_bill tip 1 2011-11-10 30 4 2 2011-11-12 40 2 3 2011-11-11 10 1 4 2011-11-09 20 3 > # > tip <- tip[order(tip$date), ] > transform(tip, total_bill = cumsum(total_bill), tip = cumsum(tip)) date total_bill tip 4 2011-11-09 20 3 1 2011-11-10 50 7 3 2011-11-11 60 8 2 2011-11-12 100 10
  • 23.
    > head(tips[c("tip", "total_bill","sex", "size", "time", "day", "smoker")]) tip total_bill sex size time day smoker 1 10 16.99 Female 2 Dinner Sun No 2 10 10.34 Male 3 Dinner Sun No 3 10 21.01 Male 3 Dinner Sun No 4 10 23.68 Male 2 Dinner Sun No 5 10 24.59 Female 4 Dinner Sun No 6 10 25.29 Male 4 Dinner Sun No
  • 25.
    • • table • xtabs • aggregate • by
  • 26.
    > args(colSums) function (x,na.rm = FALSE, dims = 1L) NULL > colSums(subset(tips, select = c(total_bill, tip)), na.rm = TRUE) total_bill tip 4827.77 731.58 > args(colMeans) function (x, na.rm = FALSE, dims = 1L) NULL > colMeans(subset(tips, select = c(total_bill, tip)), na.rm = TRUE) total_bill tip 19.785943 2.998279 > # apply colSums > apply(subset(tips, select = c(total_bill, tip)), 2, sum, na.rm = TRUE) total_bill tip 4827.77 731.58
  • 27.
    table > args(table) function (...,exclude = if (useNA == "no") c(NA, NaN), useNA = c("no", "ifany", "always"), dnn = list.names(...), deparse.level = 1) NULL > table(subset(tips, select = c(sex, smoker))) smoker sex No Yes Female 54 33 Male 97 60 > # 4 > table(subset(tips, select = c(sex, smoker, day, size))) , , day = Fri, size = 1 smoker sex No Yes Female 0 0 Male 0 1
  • 28.
    table > args(addmargins) function (A,margin = seq_along(dim(A)), FUN = sum, quiet = FALSE) NULL > # > addmargins(table(subset(tips, select = c(sex, smoker)))) smoker sex No Yes Sum Female 54 33 87 Male 97 60 157 Sum 151 93 244 > # > args(prop.table) function (x, margin = NULL) NULL > prop.table(table(subset(tips, select = c(sex, smoker)))) smoker sex No Yes Female 0.2213115 0.1352459 Male 0.3975410 0.2459016
  • 29.
    xtabs > args(xtabs) function (formula= ~., data = parent.frame(), subset, sparse = FALSE, na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE) NULL > # > xtabs(~ sex + smoker, tips) smoker sex No Yes Female 54 33 Male 97 60 > # > xtabs(cbind(total_bill, tip) ~ sex + smoker, tips) , , = total_bill smoker sex No Yes Female 977.68 593.27 Male 1919.75 1337.07
  • 30.
    aggregate > args(aggregate.data.frame) function (x,by, FUN, ..., simplify = TRUE) NULL > # FUN 1 > aggregate(tips[c("total_bill", "tip")], tips[c("sex", "day")], sum) sex day total_bill tip 1 Female Fri 127.31 25.03 2 Male Fri 198.57 26.93 3 Female Sat 551.05 78.45 4 Male Sat 1227.35 181.95 5 Female Sun 357.70 60.61 6 Male Sun 1269.46 186.78 7 Female Thur 534.89 82.42 8 Male Thur 561.44 89.41 > # formula > aggregate(cbind(total_bill, tip) ~ sex + day, tips, sum) sex day total_bill tip 1 Female Fri 127.31 25.03
  • 31.
    by > args(by) function (data,INDICES, FUN, ..., simplify = TRUE) NULL > # aggregate FUN OK > (ret <- by(tips[c("total_bill", "tip")], tips[c("sex", "day")], range)) sex: Female day: Fri [1] 1.00 22.75 ------------------------------------------------------------ sex: Male day: Fri [1] 1.50 40.17 > # data.frame > cbind(expand.grid(dimnames(ret)), do.call(rbind, ret)) sex day 1 2 1 Female Fri 1.00 22.75 2 Male Fri 1.50 40.17
  • 33.
    reshape • merge
  • 34.
    reshape > args(reshape) function (data,varying = NULL, v.names = NULL, timevar = "time", idvar = "id", ids = 1L:NROW(data), times = seq_along(varying[[1L]]), drop = NULL, direction, new.row.names = NULL, sep = ".", split = if (sep == "") { list(regexp = "[A-Za-z][0-9]", include = TRUE) } else { list(regexp = sep, include = FALSE, fixed = TRUE) }) NULL > head(reshape(tips, idvar = c("sex", "smoker", "time", "size"), + timevar = "day", drop = "total_bill", direction = "wide")) sex smoker time size tip.Sun tip.Sat tip.Thur tip.Fri 1 Female No Dinner 2 1.01 2.75 3 3.25 2 Male No Dinner 3 1.66 3.35 NA NA 4 Male No Dinner 2 3.31 4.08 NA 3.50 5 Female No Dinner 4 3.61 2.45 NA NA 6 Male No Dinner 4 4.71 7.58 NA NA 17 Female No Dinner 3 1.67 3.07 NA NA
  • 35.
    reshape > # idvar timevar > (a <- data.frame(a = c(1:3, 1), b = c(1:3, 1), c = 1:4)) a b c 1 1 1 1 2 2 2 2 3 3 3 3 4 1 1 4 > reshape(a, idvar = "a", timevar = "b", direction = "wide") a c.1 c.2 c.3 1 1 1 NA NA 2 2 NA 2 NA 3 3 NA NA 3
  • 36.
    merge > # > (user.type<- data.frame(sex = rep(c("Male", "Female"), each = 2), + smoker = c("Yes", "No"), + type = LETTERS[1:4])) sex smoker type 1 Male Yes A 2 Male No B 3 Female Yes C 4 Female No D > args(merge.data.frame) function (x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE, suffixes = c(".x", ".y"), incomparables = NULL, ...) NULL > merge(tips, user.type, by = c("sex", "smoker"), sort = FALSE)[54:55, ] sex smoker total_bill tip day time size type 54 Female No 10.65 1.50 Thur Lunch 2 D 55 Male No 10.27 1.71 Sun Dinner 2 B
  • 38.
    • • R • reshape2 • melt • cast •
  • 39.
  • 40.
    R > acast(melt(tips, id.var= c("sex", "smoker", "day"), measure.var = "tip"), + sex + smoker ~ day, sum, margins = TRUE) Fri Sat Sun Thur (all) Female_No 6.25 35.42 46.61 61.49 149.77 Female_Yes 18.78 43.03 14.00 18.93 94.74 Female_(all) 25.03 78.45 60.61 80.42 244.51 Male_No 5.00 104.21 133.96 58.83 302.00 Male_Yes 21.93 77.74 52.82 30.58 183.07 Male_(all) 26.93 181.95 186.78 89.41 485.07 (all)_(all) 51.96 260.40 247.39 169.83 729.58 reshape2
  • 41.
    reshape2 melt cast melt id > head(tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))) sex smoker day time size variable value 1 Female No Sun Dinner 2 total_bill 16.99 2 Male No Sun Dinner 3 total_bill 10.34 3 Male No Sun Dinner 3 total_bill 21.01 4 Male No Sun Dinner 2 total_bill 23.68 5 Female No Sun Dinner 4 total_bill 24.59 6 Male No Sun Dinner 4 total_bill 25.29 > levels(tipsm$variable) [1] "total_bill" "tip"
  • 42.
    melt > args(melt.data.frame) function (data,id.vars, measure.vars, variable_name = "variable", na.rm = !preserve.na, preserve.na = TRUE, ...) NULL > # factor id > head(melt(tips), 1) Using sex, smoker, day, time as id variables sex smoker day time variable value 1 Female No Sun Dinner total_bill 16.99 > # id measure > head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size")), 1) sex smoker day time size variable value 1 Female No Sun Dinner 2 total_bill 16.99 > # id measure > head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size"), + measure.vars = "tip"), 1) sex smoker day time size variable value 1 Female No Sun Dinner 2 tip 1.01
  • 43.
    cast formula fun.aggregate > args(acast) # array acast function (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data)) NULL > args(dcast) # data.frame dcast function (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data)) NULL formula ... . acast hoge ~ fuga ~ piyo ※dcast 1 hoge ~ fuga + piyo
  • 44.
    > tipsm <-melt(tips, measure.vars = c("total_bill", "tip")) > acast(tipsm, sex ~ smoker, length) No Yes Female 108 64 Male 194 120 > # > acast(tipsm, smoker ~ sex, length) Female Male No 108 194 Yes 64 120 > # > acast(tipsm, sex ~ smoker, length, margins = TRUE) No Yes (all) Female 108 64 172 Male 194 120 314 (all) 302 184 486
  • 45.
    > # size >acast(tipsm, smoker ~ sex + size, length) Female_1 Female_2 Female_3 Female_4 Female_5 Female_6 Male_1 Male_2 Male_3 No 4 66 18 14 2 4 0 114 34 Yes 2 48 10 4 0 0 2 82 14 Male_4 Male_5 Male_6 No 38 4 4 Yes 18 4 0 > # 3 > acast(tipsm, smoker ~ sex ~ size, length) , , 1 Female Male No 4 0 Yes 2 2
  • 46.
    > # sum > acast(tipsm, sex ~ day, sum) Fri Sat Sun Thur Female 152.34 629.5 418.31 617.31 total_bill tip Male 225.50 1409.3 1456.24 650.85 > # total_bill tip sum > acast(tipsm, sex + variable ~ day, sum) Fri Sat Sun Thur Female_total_bill 127.31 551.05 357.70 534.89 Female_tip 25.03 78.45 60.61 82.42 Male_total_bill 198.57 1227.35 1269.46 561.44 Male_tip 26.93 181.95 186.78 89.41 > # tip sum > acast(tipsm, sex ~ day, sum, subset = .(variable == "tip")) Fri Sat Sun Thur Female 25.03 78.45 60.61 82.42 Male 26.93 181.95 186.78 89.41
  • 48.
    reshape2 aggregate table xtabs