Successfully reported this slideshow.

RではじめるTwitter解析

25,435 views

Published on

Rユーザ会 2011 で発表した資料です

Published in: Technology, News & Politics
  • Be the first to comment

RではじめるTwitter解析

  1. 1. R Twitter R 2011 (2011/11/26) @a_bicky
  2. 2. • Takeshi Arabiki ‣ ‣ Twitter & : @a_bicky & id:a_bicky• R• http://d.hatena.ne.jp/a_bicky/
  3. 3. R Osaka.R #4 Tokyo.R #16 Tsukuba.R #9http://www.slideshare.net/abicky/twitterr http://www.slideshare.net/abicky/r-9034336 http://www.slideshare.net/abicky/r-10128090
  4. 4.
  5. 5. Twitter
  6. 6. Mentionmapp
  7. 7. Mentionmapp
  8. 8. Mentionmapp
  9. 9. http://twilog.org/ http://twitraq.userlocal.jp/http://whotwi.com/ http://tweetstats.com/
  10. 10. http://twilog.org/ http://twitraq.userlocal.jp/ Rhttp://whotwi.com/ http://tweetstats.com/
  11. 11. Twitter•• reshape2• ggplot2•
  12. 12. Twitter•• reshape2• ggplot2•
  13. 13. twitteR twitteR> library(twitteR) # twitteR> # (twitteR 0.99.15 )> Sys.setlocale("LC_TIME", "C")[1] "C"> # @a_bicky 3,200 RT> statuses <- userTimeline("a_bicky", n = 3200)
  14. 14. status> # R5> ls.str(statuses[[1]])created : POSIXct[1:1], format: "2011-11-23 22:16:24"favorited : logi FALSE ↑ UTCid : chr "139467359571296256"initFields : Formal class refMethodDef [package "methods"]with 5 slotsinitialize : Formal class refMethodDef [package "methods"]with 5 slotsreplyToSID : chr(0)replyToSN : chr(0)replyToUID : chr(0)screenName : chr "a_bicky" ! TwitterstatusSource : chr "<a href="http://sites.google.com/site/yorufukurou/" rel="nofollow">YoruFukurou</a>"text : chr " "truncated : logi FALSE ↑
  15. 15. > statusDF <- twListToDF(statuses)> str(statusDF, vec.len = 1)data.frame: 3159 obs. of 10 variables: $ text : chr " " ... ↑ $ favorited : logi FALSE ... $ replyToSN : logi NA ... $ created : POSIXct, format: "2011-11-23 22:16:24" ... $ truncated : logi FALSE ... ↑ UTC $ replyToSID : logi NA ... $ id : chr "139467359571296256" ... $ replyToUID : logi NA ... $ statusSource: chr "<a href="http://sites.google.com/site/yorufukurou/" rel="nofollow">YoruFukurou</a>" ... $ screenName : chr "a_bicky" ...
  16. 16. > wday.abb <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")> statusDF <- within(statusDF, {+ attr(created, "tzone") <- "Asia/Tokyo" # JST+ statusSource <- factor(gsub("<a .*?>(.*?)</a>", "1",statusSource)) # HTML+ date <- factor(format(created, "%Y-%m-%d")) #+ hour <- NULL; month <- NULL; year <- NULL; wday <- NULL+ with(as.POSIXlt(created), {+ hour <<- factor(hour) #+ month <<- factor(mon + 1) #+ year <<- factor(year + 1900) #+ wday <<- factor((wday + 6) %% 7, labels = wday.abb) #+ })+ textLength <- nchar(text) #+ # , URL,+ cleanText <- removeSpecialStr(text)+ cleanTextLength <- nchar(cleanText) # URL+ })
  17. 17. > # Twitter> topSources <- names(head(sort(table(statusDF$statusSource),decreasing = TRUE), 5))> statusDF <- within(statusDF, {+ statusSource <- as.character(statusSource)+ statusSource[!statusSource %in% topSources] <- "other"+ #+ statusSource <- factor(statusSource, levels = names(sort(table(statusSource), dec = TRUE)))+ })
  18. 18. Twitter•• reshape2• ggplot2•
  19. 19. reshape2
  20. 20. Excel9 11 ”Twitter for iPhone”, ”YoruFukurou” Sat Mon 12 23
  21. 21. reshape2> library(reshape2)> acast(melt(statusDF, id.vars = c("statusSource", "wday", "month", "hour"),+ measure.vars = c("textLength")),+ month + statusSource ~ wday, mean,+ subset = .(statusSource %in% c("YoruFukurou", "Twitter for iPhone")+ & month %in% 9:11 & hour %in% 12:23+ & wday %in% c("Mon", "Sat", "Sun"))) Mon Sat Sun9_YoruFukurou 43 42.13333 54.764719_Twitter for iPhone 16 27.70000 20.5000010_YoruFukurou 61 41.70175 56.9833310_Twitter for iPhone NaN 27.00000 24.5000011_YoruFukurou 35 41.08197 57.3260911_Twitter for iPhone NaN NaN 32.00000
  22. 22. reshape2> library(reshape2)> acast(melt(statusDF, id.vars = c("statusSource", "wday", "month", "hour"),+ measure.vars = c("textLength")),+ month + statusSource ~ wday, mean,+ subset = .(statusSource %in% c("YoruFukurou", "Twitter for iPhone")+ & month %in% 9:11 & hour %in% 12:23+ & wday %in% c("Mon", "Sat", "Sun"))) Mon Sat Sun9_YoruFukurou 43 42.13333 54.764719_Twitter for iPhone 16 27.70000 20.5000010_YoruFukurou 61 41.70175 56.9833310_Twitter for iPhone NaN 27.00000 24.5000011_YoruFukurou 35 41.08197 57.3260911_Twitter for iPhone NaN NaN 32.00000
  23. 23. reshape2> library(reshape2)> acast(melt(statusDF, id.vars = c("statusSource", "wday", "month", "hour"),+ measure.vars = c("textLength")),+ month + statusSource ~ wday, mean,+ subset = .(statusSource %in% c("YoruFukurou", "Twitter for iPhone")+ & month %in% 9:11 & hour %in% 12:23+ & wday %in% c("Mon", "Sat", "Sun"))) Mon Sat Sun9_YoruFukurou 43 42.13333 54.764719_Twitter for iPhone 16 27.70000 20.5000010_YoruFukurou 61 41.70175 56.9833310_Twitter for iPhone NaN 27.00000 24.5000011_YoruFukurou 35 41.08197 57.3260911_Twitter for iPhone NaN NaN 32.00000 R
  24. 24. reshape2 melt melt cast meltcast> mstatus <- melt(statusDF,+ id.vars = c("statusSource", "wday", "year", "month", "hour", "date"),+ measure.vars = c("textLength", "cleanTextLength"))> mstatus[3157:3162, ] statusSource wday year month hour date variable value3157 web Sun 2011 3 20 2011-03-13 textLength 723158 web Sun 2011 3 16 2011-03-13 textLength 243159 web Sun 2011 3 14 2011-03-13 textLength 823160 YoruFukurou Wed 2011 11 1 2011-11-23 cleanTextLength 873161 YoruFukurou Wed 2011 11 1 2011-11-23 cleanTextLength 143162 YoruFukurou Wed 2011 11 1 2011-11-23 cleanTextLength 21 id
  25. 25. reshape2 cast castformula fun.aggregate> args(acast) # array acastfunction (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data))NULL> args(dcast) # data.frame dcastfunction (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data))NULLformula....acast hoge ~ fuga ~ piyo※dcast 1 hoge ~ fuga + piyo
  26. 26. > #> acast(mstatus, . ~ wday, length, subset = .(variable == "textLength")) ↑ cleanTextLength
  27. 27. > #> acast(mstatus, . ~ wday, length, subset = .(variable == "textLength")) Mon Tue Wed Thu Fri Sat Sun[1,] 408 360 258 294 334 801 704>
  28. 28. > #> acast(mstatus, . ~ wday, length, subset = .(variable == "textLength")) Mon Tue Wed Thu Fri Sat Sun[1,] 408 360 258 294 334 801 704>
  29. 29. > #> acast(mstatus, . ~ wday, length, subset = .(variable == "textLength")) Mon Tue Wed Thu Fri Sat Sun[1,] 408 360 258 294 334 801 704> #> acast(mstatus, hour ~ wday, length, subset = .(variable =="textLength"))
  30. 30. > #> acast(mstatus, . ~ wday, length, subset = .(variable == "textLength")) Mon Tue Wed Thu Fri Sat Sun[1,] 408 360 258 294 334 801 704> #> acast(mstatus, hour ~ wday, length, subset = .(variable =="textLength")) Mon Tue Wed Thu Fri Sat Sun0 65 69 26 46 46 49 401 48 19 11 15 27 44 372 31 24 6 16 17 23 173 27 19 4 11 14 17 104 4 15 1 7 4 5 75 5 11 1 4 3 4 56 4 14 3 6 9 8 1
  31. 31. > #> #> acast(mstatus, hour ~ wday + month, length, subset = .(variable =="textLength"))
  32. 32. > #> #> acast(mstatus, hour ~ wday + month, length, subset = .(variable =="textLength")) Mon_3 Mon_4 Mon_5 Mon_6 Mon_7 Mon_8 Mon_9 Mon_10 Mon_11 Tue_3 Tue_40 3 4 13 3 1 10 7 15 9 4 21 0 0 1 0 1 9 16 12 9 1 02 2 0 0 0 2 7 6 7 7 2 0
  33. 33. > #> #> acast(mstatus, hour ~ wday + month, length, subset = .(variable =="textLength")) Mon_3 Mon_4 Mon_5 Mon_6 Mon_7 Mon_8 Mon_9 Mon_10 Mon_11 Tue_3 Tue_40 3 4 13 3 1 10 7 15 9 4 21 0 0 1 0 1 9 16 12 9 1 02 2 0 0 0 2 7 6 7 7 2 0> # 3> acast(mstatus, hour ~ wday ~ month, length, subset = .(variable =="textLength"))
  34. 34. > #> #> acast(mstatus, hour ~ wday + month, length, subset = .(variable =="textLength")) Mon_3 Mon_4 Mon_5 Mon_6 Mon_7 Mon_8 Mon_9 Mon_10 Mon_11 Tue_3 Tue_40 3 4 13 3 1 10 7 15 9 4 21 0 0 1 0 1 9 16 12 9 1 02 2 0 0 0 2 7 6 7 7 2 0> # 3> acast(mstatus, hour ~ wday ~ month, length, subset = .(variable =="textLength")), , 3 Mon Tue Wed Thu Fri Sat Sun0 3 4 1 0 1 6 41 0 1 3 0 0 0 1
  35. 35. > #> #> acast(mstatus, hour ~ wday + month, length, subset = .(variable =="textLength")) Mon_3 Mon_4 Mon_5 Mon_6 Mon_7 Mon_8 Mon_9 Mon_10 Mon_11 Tue_3 Tue_40 3 4 13 3 1 10 7 15 9 4 21 0 0 1 0 1 9 16 12 9 1 02 2 0 0 0 2 7 6 7 7 2 0> # 3> acast(mstatus, hour ~ wday ~ month, length, subset = .(variable =="textLength")), , 3 3 Mon Tue Wed Thu Fri Sat Sun0 3 4 1 0 1 6 41 0 1 3 0 0 0 1
  36. 36. Twitter reshape2 1> #> dcast(mstatus, statusSource ~ .,+ function(x) list(c(mean = mean(x), sd = sd(x))),+ fill = list(c(mean = NaN, sd = NA)), ←+ subset = .(variable == "textLength"))
  37. 37. Twitter reshape2 1> #> dcast(mstatus, statusSource ~ .,+ function(x) list(c(mean = mean(x), sd = sd(x))),+ fill = list(c(mean = NaN, sd = NA)), ←+ subset = .(variable == "textLength")) statusSource NA1 YoruFukurou 47.51462, 32.579732 web 57.02720, 36.335343 Twitter for iPhone 33.42342, 23.064664 Twitter for Android 28.49048, 20.084575 Hatena 80.00000, 25.942126 other 52.58621, 33.12180>
  38. 38. Twitter reshape2 1> #> dcast(mstatus, statusSource ~ .,+ function(x) list(c(mean = mean(x), sd = sd(x))),+ fill = list(c(mean = NaN, sd = NA)), ←+ subset = .(variable == "textLength")) statusSource NA1 YoruFukurou 47.51462, 32.579732 web 57.02720, 36.335343 Twitter for iPhone 33.42342, 23.064664 Twitter for Android 28.49048, 20.084575 Hatena 80.00000, 25.942126 other 52.58621, 33.12180>
  39. 39. > # t> pc <- unlist(subset(statusDF,+ statusSource %in% c("YoruFukurou", "web"),+ textLength))> sp <- unlist(subset(statusDF,+ grepl("(iPhone|Android)", statusSource),+ textLength))> t.test(sp, pc, var.equal = FALSE) Welch Two Sample t-test !!data: sp and pct = -15.7921, df = 1588.246, p-value < 2.2e-16alternative hypothesis: true difference in means is not equal to 095 percent confidence interval: -19.85334 -15.46645sample estimates:mean of x mean of y 31.83945 49.49935
  40. 40. > # t> pc <- unlist(subset(statusDF,+ statusSource %in% c("YoruFukurou", "web"),+ textLength))> sp <- unlist(subset(statusDF,+ grepl("(iPhone|Android)", statusSource),+ textLength))> t.test(sp, pc, var.equal = FALSE) Welch Two Sample t-test !!data: sp and pct = -15.7921, df = 1588.246, p-value < 2.2e-16alternative hypothesis: true difference in means is not equal to 095 percent confidence interval: -19.85334 -15.46645sample estimates:mean of x mean of y 31.83945 49.49935
  41. 41. > extractScreenNames <- function(text, strict = TRUE) {+ if (strict) {+ # Twitter screen_name+ regex <- "(?:(?<!w)([@ ])((?>w+))(?![@ ])|[sS])"+ } else {+ # hoge@example.com+ regex <- "(?:([@ ])(w+)|[sS])"+ }+ screenNames <- gsub(regex, "12", text, perl = TRUE)+ unique(unlist(strsplit(substring(screenNames, 2), "[@ ]")))+ }> screenNames <- unlist(lapply(statusDF$text, extractScreenNames))> head(sort(table(screenNames), decreasing = TRUE), 10) # Top 10screenNames naopr __gfx__ hirota_inoue mandy_44 ask_a_lie 105 85 51 47 40 ken_nishi nokuno yokkuns JinJin0613 kanon19_rie 39 39 33 20 20
  42. 42. Twitter•• reshape2• ggplot2•
  43. 43. ggplot2
  44. 44. ggplot2plot(statusDF$wday, col = "blue") ggplot2 qplot(wday, data = statusDF, fill = I("blue"), alpha = I(0.7), xlab = "", ylab = "")
  45. 45. ggplot2qplot(wday, data = statusDF, fill = statusSource, xlab = "", ylab = "")
  46. 46. ggplot2 qplot(wday, data = statusDF, facets = ~ statusSource, fill = I("blue"), alpha = I(0.7), xlab = "", ylab = "")qplot(wday, data = statusDF, fill = statusSource, xlab = "", ylab = "")
  47. 47. ggplot2 qplot(wday, data = statusDF, facets = ~ statusSource, fill = I("blue"), alpha = I(0.7), xlab = "", ylab = "")qplot(wday, data = statusDF, fill = statusSource, xlab = "", ylab = "")
  48. 48. qplot ggplot2> args(qplot)function (x, y = NULL, z = NULL, ..., data, facets = . ~ ., margins =FALSE, geom = "auto", stat = list(NULL), position = list(NULL), xlim = c(NA, NA), ylim = c(NA, NA), log = "", main = NULL, xlab = deparse(substitute(x)), ylab = deparse(substitute(y)), asp = NA)NULL
  49. 49. qplot geom geomarea:bar:histogram:line:point:
  50. 50. qplot geom geomarea:bar:histogram:line:point: qplot(as.integer(wday), data = statusDF, geom = "area", stat = "bin", fill = statusSource, xlab = "", ylab = "", binwidth = 1)
  51. 51. qplot geom geomarea:bar:histogram:line:point: qplot(wday, data = statusDF, geom = "bar", stat = "bin", fill = statusSource, xlab = "", ylab = "")
  52. 52. qplot geom geomarea:bar:histogram:line:point: qplot(as.integer(wday), data = statusDF, geom = "line", stat = "bin", colour = statusSource, xlab = "", ylab = "", binwidth = 1)
  53. 53. qplot geom geomarea:bar:histogram:line:point: qplot(wday, data = statusDF, geom = "point", stat = "bin", colour = statusSource, xlab = "", ylab = "")
  54. 54. qplot position position geomdodge :fill : 1jitter :stack :
  55. 55. qplot position position geomdodge :fill : 1jitter :stack : qplot(wday, data = statusDF, fill = statusSource, position = "dodge", xlab = "", ylab = "")
  56. 56. qplot position position geomdodge :fill : 1jitter :stack : qplot(wday, data = statusDF, fill = statusSource, position = "fill", xlab = "", ylab = "")
  57. 57. qplot position position geomdodge :fill : 1jitter :stack : qplot(wday, data = statusDF, fill = statusSource, position = "jitter", xlab = "", ylab = "")
  58. 58. qplot position position geomdodge :fill : 1jitter :stack : qplot(wday, data = statusDF, fill = statusSource, position = "stack", xlab = "", ylab = "")
  59. 59. qplot facets facets geom~ : 1 ~ 2: 1, 2※reshape2 1 ~ 2 + 3
  60. 60. qplot facets facets geom~ : 1 ~ 2: 1, 2※reshape2 1 ~ 2 + 3 qplot(wday, data = statusDF, xlab = "", ylab = "", facets = ~ statusSource)
  61. 61. qplot facets facets geom~ : 1 ~ 2: 1, 2※reshape2 1 ~ 2 + 3 qplot(wday, data = statusDF, xlab = "", ylab = "", facets = month ~ statusSource)
  62. 62. qplotalpha :colour (color) :fill :linetype :size :colour, fill, linetype statusSource fill = I("blue") I (AsIs)
  63. 63. qplotalpha :colour (color) :fill :linetype :size : qplot(wday, data = statusDF, xlab = "", ylab = "", alpha = as.integer(wday))
  64. 64. qplotalpha :colour (color) :fill :linetype :size : qplot(wday, data = statusDF, xlab = "", ylab = "", colour = statusSource)
  65. 65. qplotalpha :colour (color) :fill :linetype :size : qplot(wday, data = statusDF, xlab = "", ylab = "", fill = statusSource)
  66. 66. qplotalpha :colour (color) :fill :linetype :size : qplot(wday, data = statusDF, xlab = "", ylab = "", linetype = statusSource, colour = statusSource)
  67. 67. whotwi http://whotwi.com/
  68. 68. whotwi http://whotwi.com/
  69. 69. whotwi> # Twitter> # melt cast xtabs> cnt <- as.data.frame(xtabs(~ hour + wday + statusSource, statusDF))> head(cnt, 3) hour wday statusSource Freq1 0 Mon YoruFukurou 482 1 Mon YoruFukurou 383 2 Mon YoruFukurou 25
  70. 70. whotwi> # Twitter> # melt cast xtabs> cnt <- as.data.frame(xtabs(~ hour + wday + statusSource, statusDF))> head(cnt, 3) hour wday statusSource Freq1 0 Mon YoruFukurou 482 1 Mon YoruFukurou 383 2 Mon YoruFukurou 25> freqSources <- by(cnt, cnt[c("hour", "wday")], function(df) {+ #+ freqSource <- with(df, statusSource[order(Freq, decreasing = TRUE)[1]])+ cbind(df[1, c("hour", "wday")], freqSource)+ })> freqSources <- do.call(rbind, freqSources)> head(freqSources, 3) hour wday freqSource1 0 Mon YoruFukurou2 1 Mon YoruFukurou3 2 Mon YoruFukurou
  71. 71. whotwi> #> cntSum <- as.data.frame(xtabs(Freq ~ hour + wday, cnt))> head(cntSum, 3) hour wday Freq1 0 Mon 652 1 Mon 483 2 Mon 31
  72. 72. whotwi> #> cntSum <- as.data.frame(xtabs(Freq ~ hour + wday, cnt))> head(cntSum, 3) hour wday Freq1 0 Mon 652 1 Mon 483 2 Mon 31> #> data <- merge(cntSum, freqSources)> #> data$wday <- factor(data$wday, levels = rev(levels(data$wday)))> #> data$Freq <- log2(data$Freq)
  73. 73. whotwi> #> cntSum <- as.data.frame(xtabs(Freq ~ hour + wday, cnt))> head(cntSum, 3) hour wday Freq1 0 Mon 652 1 Mon 483 2 Mon 31> #> data <- merge(cntSum, freqSources)> #> data$wday <- factor(data$wday, levels = rev(levels(data$wday)))> #> data$Freq <- log2(data$Freq)> p <- qplot(hour, wday, data = data, xlab = "", ylab = "",+ geom = "point", colour = freqSource, size = Freq)> p # print(p)
  74. 74. whotwi> #> cntSum <- as.data.frame(xtabs(Freq ~ hour + wday, cnt))> head(cntSum, 3) hour wday Freq1 0 Mon 652 1 Mon 483 2 Mon 31> #> data <- merge(cntSum, freqSources)> #> data$wday <- factor(data$wday, levels = rev(levels(data$wday)))> #> data$Freq <- log2(data$Freq)> p <- qplot(hour, wday, data = data, xlab = "", ylab = "",+ geom = "point", colour = freqSource, size = Freq)> p # print(p)
  75. 75. whotwi
  76. 76. whotwi
  77. 77. whotwi> # whotwi theme> theme_whotwi <- function() {+ opts( #+ panel.background = theme_rect(fill = NA, colour = NA),+ #+ legend.key = theme_rect(fill = NA, colour = NA),+ #+ axis.ticks = theme_segment(colour = NA))+ }> p2 <- p + theme_whotwi() + scale_size(legend = FALSE) +scale_colour_hue(name = "")> p2
  78. 78. whotwi> # whotwi theme> theme_whotwi <- function() {+ opts( #+ panel.background = theme_rect(fill = NA, colour = NA),+ #+ legend.key = theme_rect(fill = NA, colour = NA),+ #+ axis.ticks = theme_segment(colour = NA))+ }> p2 <- p + theme_whotwi() + scale_size(legend = FALSE) +scale_colour_hue(name = "")> p2
  79. 79. whotwi
  80. 80. whotwi
  81. 81. whotwi
  82. 82. whotwi PC
  83. 83. whotwiPC PC
  84. 84. Twitter•• reshape2• ggplot2•
  85. 85. TweetSentiments
  86. 86. TweetSentimentsR
  87. 87. 1. RMeCab2.3.
  88. 88. RMeCab MeCab R> library(RMeCab)> (docDF(data.frame(" "), column = 1, type = 1))number of extracted terms = 5now making a data frame. wait a while! TERM POS1 POS2 Row11 12 13 14 25 2
  89. 89. http://www.lr.pi.titech.ac.jp/~takamura/pndic_ja.html : : :1 : : :0.999995 : : :0.999979 : : :0.999979 : : :0.999645 : : :0.999486 : : :0.999314...
  90. 90. > #> pndic <- read.table("http://www.lr.pi.titech.ac.jp/~takamura/pubs/pn_ja.dic",+ sep = ":",+ col.names = c("term", "kana", "pos", "value"),+ colClasses = c("character", "character", "factor","numeric"),+ fileEncoding = "Shift_JIS")> #> #> pndic2 <- aggregate(value ~ term + pos, pndic, mean)
  91. 91. > # pndic> pos <- unique(pndic2$pos)> tweetDF <- docDF(statusDF, column = "cleanText", type = 1, pos = pos)number of extracted terms = 7164now making a data frame. wait a while!> tweetDF[2900:2904, 1:5] TERM POS1 POS2 Row1 Row22900 0 02901 0 02902 0 02903 0 02904 0 0> # pndic> tweetDF <- subset(tweetDF, TERM %in% pndic2$term)> #> tweetDF <- merge(tweetDF, pndic2, by.x = c("TERM", "POS1"), by.y = c("term", "pos"))
  92. 92. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117
  93. 93. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117
  94. 94. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117> #> sum(score < 0)[1] 2765
  95. 95. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117> #> sum(score < 0)[1] 2765
  96. 96. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117> #> sum(score < 0)[1] 2765> #> sum(score == 0)[1] 277
  97. 97. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117> #> sum(score < 0)[1] 2765> #> sum(score == 0)[1] 277
  98. 98. > #> score <- colSums(tweetDF[4:(ncol(tweetDF) - 1)] * tweetDF$value)> #> sum(score > 0)[1] 117> #> sum(score < 0)[1] 2765> #> sum(score == 0)[1] 277
  99. 99. > table(ifelse(pndic$value > 0, "positive",+ ifelse(pndic$value == 0, "neutral", "negative")))negative neutral positive 49983 20 5122
  100. 100. > table(ifelse(pndic$value > 0, "positive",+ ifelse(pndic$value == 0, "neutral", "negative")))negative neutral positive 49983 20 5122
  101. 101. > m <- mean(score)> #> tweetType <- factor(ifelse(score > m, "positive",+ ifelse(score == m, "neutral", "negative")),+ levels = c("positive", "neutral", "negative"))> table(tweetType)tweetTypepositive neutral negative 1912 0 1247
  102. 102. > statusDF$tweetType <- droplevels(tweetType)> #> qplot(month, data = statusDF,+ geom = "bar", fill = tweetType, position = "fill")
  103. 103. > statusDF$tweetType <- droplevels(tweetType)> #> qplot(month, data = statusDF,+ geom = "bar", fill = tweetType, position = "fill")
  104. 104. > statusDF$tweetType <- droplevels(tweetType)> #> qplot(month, data = statusDF,+ geom = "bar", fill = tweetType, position = "fill")
  105. 105. > statusDF$tweetType <- droplevels(tweetType)> #> qplot(month, data = statusDF,+ geom = "bar", fill = tweetType, position = "fill")
  106. 106. twitteR• RJSONIO•• ID status ID• fav favorited TRUE• truncated TRUE• DM• status character factor
  107. 107. twitteR• RJSONIO•• ID status ID• fav favorited TRUE• truncated TRUE• DM• status character factor
  108. 108. OAuth ” ” twitteR -
  109. 109. • twitteR• reshape2 R• ggplot2• RMeCab R
  110. 110. • twitteR• reshape2 R• ggplot2• RMeCab R• PC•
  111. 111. https://github.com/abicky/rjpusers2011_abicky
  112. 112. status> statuses[[1]]$text[1] " "> statuses[[1]]$getText() #[1] " "> #> statuses[[1]]$text <- " "> statuses[[1]]$getText()[1] " "> statuses[[1]]$setText("ggrks") #> statuses[[1]]$getText()[1] "ggrks"> #> statuses[[1]]$getCreated()[1] "2011-11-23 22:16:24 UTC"
  113. 113. removeSpecialStrremoveSpecialStr <- function(text) { removeURL(removeHashTag(removeScreenName(text)))}
  114. 114. removeScreenNameremoveScreenName <- function(text, strict = TRUE) { if (strict) { regex <- "(?<!w)[@ ](?>w+)(?![@ ])" } else { regex <- "[@ ]w+" } gsub(regex, "", text, perl = TRUE)}
  115. 115. removeURLremoveURL <- function(text, strict = TRUE) { if (strict) { regex <- "(?<![-.w#@=!"/])https?://(?:[^:]+:.+@)?(?:[0-9A-Za-z][-0-9A-Za-z]*(?<!-).)+[A-za-z]+(?:/[-w#%=+,.?!&~]*)*" } else { regex <- "https?://[-w#%=+,.?!&~/]+" } gsub(regex, "", text, perl = TRUE)}
  116. 116. removeHashTagremoveHashTag <- function(text, strict = TRUE) { delimiters <- "s,.u3000-u3002uFF01uFF1F" # cf. http://nobu666.com/2011/07/13/914.html validJa <- "u3041-u3094u3099-u309Cu30A1-u30FAu30FCu3400-uD7A3uFF10-uFF19uFF21-uFF3AuFF41-uFF5AuFF66-uFF9E" if (strict) { regex <- sprintf("(^|[%s])(?:([# ](?>[0-9]+)(?!w))|[# ][w%s]+)", delimiters, validJa, validJa) } else { regex <- sprintf("[# ][^%s]+", delimiters) } gsub(regex, "12", text, perl = TRUE)}

×