Data Visualization

1 of 98

http://nycdatascience.com/part4_en/
Data Visualization

2 of 98

http://nycdatascience.com/part4_en/
Data Visualization

3 of 98

http://nycdatascience.com/part4_en/
Data Visualization

4 of 98

http://nycdatascience.com/part4_en/

data <- read.table('data/anscombe.txt',T)
data <- data[,-1]
head(data)

1
2
3
4
5
6

x1
10
8
13
9
11
14

x2
10
8
13
9
11
14

x3 x4
y1
y2
y3
y4
10 8 8.04 9.14 7.46 6.58
8 8 6.95 8.14 6.77 5.76
13 8 7.58 8.74 12.74 7.71
9 8 8.81 8.77 7.11 8.84
11 8 8.33 9.26 7.81 8.47
14 8 9.96 8.10 8.84 7.04
Data Visualization

5 of 98

http://nycdatascience.com/part4_en/

colMeans(data)

x1 x2 x3 x4 y1 y2 y3 y4
9.0 9.0 9.0 9.0 7.5 7.5 7.5 7.5

sapply(1:4,function(x) cor(data[,x],data[,x+4]))

[1] 0.816 0.816 0.816 0.817
Data Visualization

6 of 98

http://nycdatascience.com/part4_en/
Data Visualization

7 of 98

http://nycdatascience.com/part4_en/
Data Visualization

8 of 98

http://nycdatascience.com/part4_en/
Data Visualization

9 of 98

http://nycdatascience.com/part4_en/
Data Visualization

10 of 98

http://nycdatascience.com/part4_en/
Data Visualization

11 of 98

http://nycdatascience.com/part4_en/

plot(cars$dist~cars$speed)
Data Visualization

12 of 98

http://nycdatascience.com/part4_en/

plot(cars$dist,type='l')
Data Visualization

13 of 98

http://nycdatascience.com/part4_en/

plot(cars$dist,type='h')
Data Visualization

14 of 98

http://nycdatascience.com/part4_en/

hist(cars$dist)
Data Visualization

15 of 98

http://nycdatascience.com/part4_en/

library(lattice)
num <- sample(1:3,size=50,replace=T)
barchart(table(num))
Data Visualization

16 of 98

http://nycdatascience.com/part4_en/

qqmath(rnorm(100))
Data Visualization

17 of 98

http://nycdatascience.com/part4_en/

stripplot(~ Sepal.Length | Species, data = iris,layout=c(1,3))
Data Visualization

18 of 98

http://nycdatascience.com/part4_en/

densityplot(~ Sepal.Length, groups=Species, data = iris,plot.points=FALSE)
Data Visualization

19 of 98

http://nycdatascience.com/part4_en/

bwplot(Species~ Sepal.Length, data = iris)
Data Visualization

20 of 98

http://nycdatascience.com/part4_en/

xyplot(Sepal.Width~ Sepal.Length, groups=Species, data = iris)
Data Visualization

21 of 98

http://nycdatascience.com/part4_en/

splom(iris[1:4])
Data Visualization

22 of 98

http://nycdatascience.com/part4_en/

histogram(~ Sepal.Length | Species, data = iris,layout=c(1,3))
Data Visualization

23 of 98

http://nycdatascience.com/part4_en/

library(plyr)
func3d <- function(x,y) {
sin(x^2/2 - y^2/4) * cos(2*x - exp(y))
}
vec1 <- vec2 <- seq(0,2,length=30)
para <- expand.grid(x=vec1,y=vec2)
result6 <- mdply(.data=para,.fun=func3d)
Data Visualization

24 of 98

http://nycdatascience.com/part4_en/

library(lattice)
wireframe(V1~x*y,data=result6,scales = list(arrows = FALSE),
drape = TRUE, colorkey = F)
Data Visualization

25 of 98

http://nycdatascience.com/part4_en/

library(ggplot2)
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point()
print(p)
Data Visualization

26 of 98

http://nycdatascience.com/part4_en/

summary(p)

data: manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class [234x11]
mapping: x = cty, y = hwy
faceting: facet_null()
----------------------------------geom_point: na.rm = FALSE
stat_identity:
position_identity: (width = NULL, height = NULL)
Data Visualization

27 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))
p <- p + geom_point()
print(p)
Data Visualization

28 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))
p <- p + geom_smooth()
print(p)
Data Visualization

29 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth()
Data Visualization

30 of 98

http://nycdatascience.com/part4_en/
Data Visualization

31 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth() +
scale_color_manual(values=c('blue2','red4'))
Data Visualization

32 of 98

http://nycdatascience.com/part4_en/
Data Visualization

33 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth() +
scale_color_manual(values=c('blue2','red4')) +
facet_wrap(~ year,ncol=1)
Data Visualization

34 of 98

http://nycdatascience.com/part4_en/
Data Visualization

35 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=class,size=displ),
alpha=0.5,position = "jitter") +
geom_smooth() +
scale_size_continuous(range = c(4, 10)) +
facet_wrap(~ year,ncol=1) +
opts(title='Vehicle model and fuel consumption') +
labs(y='Highway miles per gallon',
x='Urban miles per gallon',
size='Displacement',
colour = 'Model')
Data Visualization

36 of 98

http://nycdatascience.com/part4_en/
Data Visualization

37 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year),size=displ), alpha=0.5,position = "jitter")+
stat_smooth()+
scale_color_manual(values =c('steelblue','red4'))+
scale_size_continuous(range = c(4, 10))
Data Visualization

38 of 98

http://nycdatascience.com/part4_en/
Data Visualization

39 of 98

http://nycdatascience.com/part4_en/

library(ggplot2)
p <- ggplot(data=iris,aes(x=Sepal.Length))+
geom_histogram()
print(p)
Data Visualization

40 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length))+
geom_histogram(binwidth=0.1,
# Set the group gap
fill='skyblue', # Set the fill color
colour='black') # Set the border color
Data Visualization

41 of 98

http://nycdatascience.com/part4_en/
Data Visualization

42 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length)) +
geom_histogram(aes(y=..density..),
fill='skyblue',
color='black') +
geom_density(color='black',
linetype=2,adjust=2)
Data Visualization

43 of 98

http://nycdatascience.com/part4_en/
Data Visualization

44 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length)) +
geom_histogram(aes(y=..density..), # Note: set y to relative frequency
fill='gray60',
color='gray') +
geom_density(color='black',linetype=1,adjust=0.5) +
geom_density(color='black',linetype=2,adjust=1) +
geom_density(color='black',linetype=3,adjust=2)
Data Visualization

45 of 98

http://nycdatascience.com/part4_en/
Data Visualization

46 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length,fill=Species)) + geom_density(alpha=0.5,color='gra
print(p)
Data Visualization

47 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_boxplot()
print(p)
Data Visualization

48 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_violin()
print(p)
Data Visualization

49 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,
fill=Species)) +
geom_violin(fill='gray',alpha=0.5) +
geom_dotplot(binaxis = "y", stackdir = "center")
print(p)
Data Visualization

50 of 98

http://nycdatascience.com/part4_en/
Data Visualization

51 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(mpg,aes(x=class)) +
geom_bar()
print(p)
Data Visualization

52 of 98

http://nycdatascience.com/part4_en/

mpg$year <- factor(mpg$year)
p <- ggplot(mpg,aes(x=class,fill=year)) +
geom_bar(color='black')
Data Visualization

53 of 98

http://nycdatascience.com/part4_en/
Data Visualization

54 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(mpg,aes(x=class,fill=year)) +
geom_bar(color='black',
position=position_dodge())
Data Visualization

55 of 98

http://nycdatascience.com/part4_en/
Data Visualization

56 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) +
geom_bar(width = 1)+
coord_polar(theta = "y")
Data Visualization

57 of 98

http://nycdatascience.com/part4_en/
Data Visualization

58 of 98

http://nycdatascience.com/part4_en/

set.seed(1)
# Randomly generate 100 wind directions, and divide them into 16 intervals.
dir <- cut_interval(runif(100,0,360),n=16)
# Randomly generate 100 wind speed, and divide them into 4 intensities.
mag <- cut_interval(rgamma(100,15),4)
sample <- data.frame(dir=dir,mag=mag)
# Map wind direction to X-axie, frequency to Y-axie and speed to fill colors. Transfor
p <- ggplot(sample,aes(x=dir,fill=mag)) +
geom_bar()+ coord_polar()
Data Visualization

59 of 98

http://nycdatascience.com/part4_en/
Data Visualization

60 of 98

http://nycdatascience.com/part4_en/
Data Visualization

61 of 98

http://nycdatascience.com/part4_en/
Data Visualization

62 of 98

http://nycdatascience.com/part4_en/

data <- read.csv('data/soft_impact.csv',T)
library(reshape2)
data.melt <- melt(data,id='Year')
p <- ggplot(data.melt,aes(x=Year,y=value,
group=variable,fill=variable)) +
geom_area(color='black',size=0.3,
position=position_fill()) +
scale_fill_brewer()
Data Visualization

63 of 98

http://nycdatascience.com/part4_en/
Data Visualization

64 of 98

http://nycdatascience.com/part4_en/
Data Visualization

65 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point()
print(p)
Data Visualization

66 of 98

http://nycdatascience.com/part4_en/

mpg$year <- factor(mpg$year)
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year))
print(p)
Data Visualization

67 of 98

http://nycdatascience.com/part4_en/

mpg$year <- factor(mpg$year)
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,shape=year))
print(p)
Data Visualization

68 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position
print(p)
Data Visualization

69 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(color=year),alpha=0.5,position = "jitter") +
geom_smooth(method='lm')
print(p)
Data Visualization

70 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(color=year,size=displ),alpha=0.5,position = "jitter") +
geom_smooth(method='lm') +
scale_size_continuous(range = c(4, 10))
Data Visualization

71 of 98

http://nycdatascience.com/part4_en/
Data Visualization

72 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(colour=class,size=displ),
alpha=0.5,position = "jitter") +
geom_smooth() +
scale_size_continuous(range = c(4, 10)) +
facet_wrap(~ year,ncol=1)
Data Visualization

73 of 98

http://nycdatascience.com/part4_en/
Data Visualization

74 of 98

http://nycdatascience.com/part4_en/
Data Visualization

75 of 98

http://nycdatascience.com/part4_en/
Data Visualization

76 of 98

http://nycdatascience.com/part4_en/
Data Visualization

77 of 98

http://nycdatascience.com/part4_en/
Data Visualization

78 of 98

http://nycdatascience.com/part4_en/
Data Visualization

79 of 98

http://nycdatascience.com/part4_en/
Data Visualization

80 of 98

http://nycdatascience.com/part4_en/
Data Visualization

81 of 98

http://nycdatascience.com/part4_en/

fillcolor <- ifelse(economics[440:470,'unemploy']<8000,'steelblue','red4')
p <- ggplot(economics[440:470,],aes(x=date,y=unemploy)) +
geom_bar(stat='identity',
fill=fillcolor)
Data Visualization

82 of 98

http://nycdatascience.com/part4_en/
Data Visualization

83 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(economics[300:470,],aes(x=date,ymax=psavert,ymin=0)) +
geom_linerange(color='grey20',size=0.5) +
geom_point(aes(y=psavert),color='red4') +
theme_bw()
Data Visualization

84 of 98

http://nycdatascience.com/part4_en/
Data Visualization

85 of 98

http://nycdatascience.com/part4_en/

fill.color <- ifelse(economics$date > '1980-01-01' &
economics$date < '1990-01-01',
'steelblue','red4')
p <- ggplot(economics,aes(x=date,ymax=psavert,ymin=0)) +
geom_linerange(color=fill.color,size=0.9) +
geom_text(aes(x=as.Date("1985-01-01",'%Y-%m-%d'),y=13),label="1980'") +
theme_bw()
Data Visualization

86 of 98

http://nycdatascience.com/part4_en/
Data Visualization

87 of 98

http://nycdatascience.com/part4_en/
Data Visualization

88 of 98

http://nycdatascience.com/part4_en/
Data Visualization

89 of 98

http://nycdatascience.com/part4_en/
Data Visualization

90 of 98

http://nycdatascience.com/part4_en/

library(ggplot2)
world <- map_data("world")
worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +
geom_path(color='gray10',size=0.3) +
geom_point(x=114,y=30,size=10,shape='*') +
scale_y_continuous(breaks=(-2:2) * 30) +
scale_x_continuous(breaks=(-4:4) * 45) +
coord_map("ortho", orientation=c(30, 120, 0)) +
theme(panel.grid.major = element_line(colour = "gray50"),
panel.background = element_rect(fill = "white"),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank())
Data Visualization

91 of 98

http://nycdatascience.com/part4_en/
Data Visualization

92 of 98

http://nycdatascience.com/part4_en/

map <- map_data('state')
arrests <- USArrests
names(arrests) <- tolower(names(arrests))
arrests$region <- tolower(rownames(USArrests))
usmap <- ggplot(data=arrests) +
geom_map(map =map,aes(map_id = region,fill = murder),color='gray40' ) +
expand_limits(x = map$long, y = map$lat) +
scale_fill_continuous(high='red2',low='white') +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.position = c(0.95,0.28),
legend.background=element_rect(fill="white", colour="white"))+ coord_map('mercat
Data Visualization

93 of 98

http://nycdatascience.com/part4_en/
Data Visualization

94 of 98

http://nycdatascience.com/part4_en/

library(ggmap)
library(XML)
webpage <-'http://data.earthquake.cn/datashare/globeEarthquake_csn.html'
tables <- readHTMLTable(webpage,stringsAsFactors = FALSE)
raw <- tables[[6]]
data <- raw[,c(1,3,4)]
names(data) <- c('date','lan','lon')
data$lan <- as.numeric(data$lan)
data$lon <- as.numeric(data$lon)
data$date <- as.Date(data$date, "%Y-%m-%d")
#Read the map data from Google by the ggmap package, and mark the previous data on the
earthquake <- ggmap(get_googlemap(center = 'china', zoom=4,maptype='terrain'),extent='
geom_point(data=data,aes(x=lon,y=lan),colour = 'red',alpha=0.7)+
theme(legend.position = "none")
Data Visualization

95 of 98

http://nycdatascience.com/part4_en/
Data Visualization

96 of 98

http://nycdatascience.com/part4_en/

library(googleVis)
library(WDI)
DF <- WDI(country=c("CN","RU","BR","ZA","IN",'DE','AU','CA','FR','IT','JP','MX','GB','
M <- gvisMotionChart(DF, idvar="country", timevar="year",
xvar='EN.ATM.CO2E.KT',
yvar='NY.GDP.MKTP.CD')
plot(M)
Data Visualization

97 of 98

http://nycdatascience.com/part4_en/
Data Visualization

98 of 98

http://nycdatascience.com/part4_en/

R class 5 -data visualization

  • 1.
    Data Visualization 1 of98 http://nycdatascience.com/part4_en/
  • 2.
    Data Visualization 2 of98 http://nycdatascience.com/part4_en/
  • 3.
    Data Visualization 3 of98 http://nycdatascience.com/part4_en/
  • 4.
    Data Visualization 4 of98 http://nycdatascience.com/part4_en/ data <- read.table('data/anscombe.txt',T) data <- data[,-1] head(data) 1 2 3 4 5 6 x1 10 8 13 9 11 14 x2 10 8 13 9 11 14 x3 x4 y1 y2 y3 y4 10 8 8.04 9.14 7.46 6.58 8 8 6.95 8.14 6.77 5.76 13 8 7.58 8.74 12.74 7.71 9 8 8.81 8.77 7.11 8.84 11 8 8.33 9.26 7.81 8.47 14 8 9.96 8.10 8.84 7.04
  • 5.
    Data Visualization 5 of98 http://nycdatascience.com/part4_en/ colMeans(data) x1 x2 x3 x4 y1 y2 y3 y4 9.0 9.0 9.0 9.0 7.5 7.5 7.5 7.5 sapply(1:4,function(x) cor(data[,x],data[,x+4])) [1] 0.816 0.816 0.816 0.817
  • 6.
    Data Visualization 6 of98 http://nycdatascience.com/part4_en/
  • 7.
    Data Visualization 7 of98 http://nycdatascience.com/part4_en/
  • 8.
    Data Visualization 8 of98 http://nycdatascience.com/part4_en/
  • 9.
    Data Visualization 9 of98 http://nycdatascience.com/part4_en/
  • 10.
    Data Visualization 10 of98 http://nycdatascience.com/part4_en/
  • 11.
    Data Visualization 11 of98 http://nycdatascience.com/part4_en/ plot(cars$dist~cars$speed)
  • 12.
    Data Visualization 12 of98 http://nycdatascience.com/part4_en/ plot(cars$dist,type='l')
  • 13.
    Data Visualization 13 of98 http://nycdatascience.com/part4_en/ plot(cars$dist,type='h')
  • 14.
    Data Visualization 14 of98 http://nycdatascience.com/part4_en/ hist(cars$dist)
  • 15.
    Data Visualization 15 of98 http://nycdatascience.com/part4_en/ library(lattice) num <- sample(1:3,size=50,replace=T) barchart(table(num))
  • 16.
    Data Visualization 16 of98 http://nycdatascience.com/part4_en/ qqmath(rnorm(100))
  • 17.
    Data Visualization 17 of98 http://nycdatascience.com/part4_en/ stripplot(~ Sepal.Length | Species, data = iris,layout=c(1,3))
  • 18.
    Data Visualization 18 of98 http://nycdatascience.com/part4_en/ densityplot(~ Sepal.Length, groups=Species, data = iris,plot.points=FALSE)
  • 19.
    Data Visualization 19 of98 http://nycdatascience.com/part4_en/ bwplot(Species~ Sepal.Length, data = iris)
  • 20.
    Data Visualization 20 of98 http://nycdatascience.com/part4_en/ xyplot(Sepal.Width~ Sepal.Length, groups=Species, data = iris)
  • 21.
    Data Visualization 21 of98 http://nycdatascience.com/part4_en/ splom(iris[1:4])
  • 22.
    Data Visualization 22 of98 http://nycdatascience.com/part4_en/ histogram(~ Sepal.Length | Species, data = iris,layout=c(1,3))
  • 23.
    Data Visualization 23 of98 http://nycdatascience.com/part4_en/ library(plyr) func3d <- function(x,y) { sin(x^2/2 - y^2/4) * cos(2*x - exp(y)) } vec1 <- vec2 <- seq(0,2,length=30) para <- expand.grid(x=vec1,y=vec2) result6 <- mdply(.data=para,.fun=func3d)
  • 24.
    Data Visualization 24 of98 http://nycdatascience.com/part4_en/ library(lattice) wireframe(V1~x*y,data=result6,scales = list(arrows = FALSE), drape = TRUE, colorkey = F)
  • 25.
    Data Visualization 25 of98 http://nycdatascience.com/part4_en/ library(ggplot2) p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point() print(p)
  • 26.
    Data Visualization 26 of98 http://nycdatascience.com/part4_en/ summary(p) data: manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class [234x11] mapping: x = cty, y = hwy faceting: facet_null() ----------------------------------geom_point: na.rm = FALSE stat_identity: position_identity: (width = NULL, height = NULL)
  • 27.
    Data Visualization 27 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year))) p <- p + geom_point() print(p)
  • 28.
    Data Visualization 28 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year))) p <- p + geom_smooth() print(p)
  • 29.
    Data Visualization 29 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year))) + geom_smooth()
  • 30.
    Data Visualization 30 of98 http://nycdatascience.com/part4_en/
  • 31.
    Data Visualization 31 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year))) + geom_smooth() + scale_color_manual(values=c('blue2','red4'))
  • 32.
    Data Visualization 32 of98 http://nycdatascience.com/part4_en/
  • 33.
    Data Visualization 33 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year))) + geom_smooth() + scale_color_manual(values=c('blue2','red4')) + facet_wrap(~ year,ncol=1)
  • 34.
    Data Visualization 34 of98 http://nycdatascience.com/part4_en/
  • 35.
    Data Visualization 35 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=class,size=displ), alpha=0.5,position = "jitter") + geom_smooth() + scale_size_continuous(range = c(4, 10)) + facet_wrap(~ year,ncol=1) + opts(title='Vehicle model and fuel consumption') + labs(y='Highway miles per gallon', x='Urban miles per gallon', size='Displacement', colour = 'Model')
  • 36.
    Data Visualization 36 of98 http://nycdatascience.com/part4_en/
  • 37.
    Data Visualization 37 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year),size=displ), alpha=0.5,position = "jitter")+ stat_smooth()+ scale_color_manual(values =c('steelblue','red4'))+ scale_size_continuous(range = c(4, 10))
  • 38.
    Data Visualization 38 of98 http://nycdatascience.com/part4_en/
  • 39.
    Data Visualization 39 of98 http://nycdatascience.com/part4_en/ library(ggplot2) p <- ggplot(data=iris,aes(x=Sepal.Length))+ geom_histogram() print(p)
  • 40.
    Data Visualization 40 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length))+ geom_histogram(binwidth=0.1, # Set the group gap fill='skyblue', # Set the fill color colour='black') # Set the border color
  • 41.
    Data Visualization 41 of98 http://nycdatascience.com/part4_en/
  • 42.
    Data Visualization 42 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length)) + geom_histogram(aes(y=..density..), fill='skyblue', color='black') + geom_density(color='black', linetype=2,adjust=2)
  • 43.
    Data Visualization 43 of98 http://nycdatascience.com/part4_en/
  • 44.
    Data Visualization 44 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length)) + geom_histogram(aes(y=..density..), # Note: set y to relative frequency fill='gray60', color='gray') + geom_density(color='black',linetype=1,adjust=0.5) + geom_density(color='black',linetype=2,adjust=1) + geom_density(color='black',linetype=3,adjust=2)
  • 45.
    Data Visualization 45 of98 http://nycdatascience.com/part4_en/
  • 46.
    Data Visualization 46 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length,fill=Species)) + geom_density(alpha=0.5,color='gra print(p)
  • 47.
    Data Visualization 47 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_boxplot() print(p)
  • 48.
    Data Visualization 48 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_violin() print(p)
  • 49.
    Data Visualization 49 of98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Species,y=Sepal.Length, fill=Species)) + geom_violin(fill='gray',alpha=0.5) + geom_dotplot(binaxis = "y", stackdir = "center") print(p)
  • 50.
    Data Visualization 50 of98 http://nycdatascience.com/part4_en/
  • 51.
    Data Visualization 51 of98 http://nycdatascience.com/part4_en/ p <- ggplot(mpg,aes(x=class)) + geom_bar() print(p)
  • 52.
    Data Visualization 52 of98 http://nycdatascience.com/part4_en/ mpg$year <- factor(mpg$year) p <- ggplot(mpg,aes(x=class,fill=year)) + geom_bar(color='black')
  • 53.
    Data Visualization 53 of98 http://nycdatascience.com/part4_en/
  • 54.
    Data Visualization 54 of98 http://nycdatascience.com/part4_en/ p <- ggplot(mpg,aes(x=class,fill=year)) + geom_bar(color='black', position=position_dodge())
  • 55.
    Data Visualization 55 of98 http://nycdatascience.com/part4_en/
  • 56.
    Data Visualization 56 of98 http://nycdatascience.com/part4_en/ p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) + geom_bar(width = 1)+ coord_polar(theta = "y")
  • 57.
    Data Visualization 57 of98 http://nycdatascience.com/part4_en/
  • 58.
    Data Visualization 58 of98 http://nycdatascience.com/part4_en/ set.seed(1) # Randomly generate 100 wind directions, and divide them into 16 intervals. dir <- cut_interval(runif(100,0,360),n=16) # Randomly generate 100 wind speed, and divide them into 4 intensities. mag <- cut_interval(rgamma(100,15),4) sample <- data.frame(dir=dir,mag=mag) # Map wind direction to X-axie, frequency to Y-axie and speed to fill colors. Transfor p <- ggplot(sample,aes(x=dir,fill=mag)) + geom_bar()+ coord_polar()
  • 59.
    Data Visualization 59 of98 http://nycdatascience.com/part4_en/
  • 60.
    Data Visualization 60 of98 http://nycdatascience.com/part4_en/
  • 61.
    Data Visualization 61 of98 http://nycdatascience.com/part4_en/
  • 62.
    Data Visualization 62 of98 http://nycdatascience.com/part4_en/ data <- read.csv('data/soft_impact.csv',T) library(reshape2) data.melt <- melt(data,id='Year') p <- ggplot(data.melt,aes(x=Year,y=value, group=variable,fill=variable)) + geom_area(color='black',size=0.3, position=position_fill()) + scale_fill_brewer()
  • 63.
    Data Visualization 63 of98 http://nycdatascience.com/part4_en/
  • 64.
    Data Visualization 64 of98 http://nycdatascience.com/part4_en/
  • 65.
    Data Visualization 65 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point() print(p)
  • 66.
    Data Visualization 66 of98 http://nycdatascience.com/part4_en/ mpg$year <- factor(mpg$year) p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year)) print(p)
  • 67.
    Data Visualization 67 of98 http://nycdatascience.com/part4_en/ mpg$year <- factor(mpg$year) p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,shape=year)) print(p)
  • 68.
    Data Visualization 68 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position print(p)
  • 69.
    Data Visualization 69 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position = "jitter") + geom_smooth(method='lm') print(p)
  • 70.
    Data Visualization 70 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,size=displ),alpha=0.5,position = "jitter") + geom_smooth(method='lm') + scale_size_continuous(range = c(4, 10))
  • 71.
    Data Visualization 71 of98 http://nycdatascience.com/part4_en/
  • 72.
    Data Visualization 72 of98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(colour=class,size=displ), alpha=0.5,position = "jitter") + geom_smooth() + scale_size_continuous(range = c(4, 10)) + facet_wrap(~ year,ncol=1)
  • 73.
    Data Visualization 73 of98 http://nycdatascience.com/part4_en/
  • 74.
    Data Visualization 74 of98 http://nycdatascience.com/part4_en/
  • 75.
    Data Visualization 75 of98 http://nycdatascience.com/part4_en/
  • 76.
    Data Visualization 76 of98 http://nycdatascience.com/part4_en/
  • 77.
    Data Visualization 77 of98 http://nycdatascience.com/part4_en/
  • 78.
    Data Visualization 78 of98 http://nycdatascience.com/part4_en/
  • 79.
    Data Visualization 79 of98 http://nycdatascience.com/part4_en/
  • 80.
    Data Visualization 80 of98 http://nycdatascience.com/part4_en/
  • 81.
    Data Visualization 81 of98 http://nycdatascience.com/part4_en/ fillcolor <- ifelse(economics[440:470,'unemploy']<8000,'steelblue','red4') p <- ggplot(economics[440:470,],aes(x=date,y=unemploy)) + geom_bar(stat='identity', fill=fillcolor)
  • 82.
    Data Visualization 82 of98 http://nycdatascience.com/part4_en/
  • 83.
    Data Visualization 83 of98 http://nycdatascience.com/part4_en/ p <- ggplot(economics[300:470,],aes(x=date,ymax=psavert,ymin=0)) + geom_linerange(color='grey20',size=0.5) + geom_point(aes(y=psavert),color='red4') + theme_bw()
  • 84.
    Data Visualization 84 of98 http://nycdatascience.com/part4_en/
  • 85.
    Data Visualization 85 of98 http://nycdatascience.com/part4_en/ fill.color <- ifelse(economics$date > '1980-01-01' & economics$date < '1990-01-01', 'steelblue','red4') p <- ggplot(economics,aes(x=date,ymax=psavert,ymin=0)) + geom_linerange(color=fill.color,size=0.9) + geom_text(aes(x=as.Date("1985-01-01",'%Y-%m-%d'),y=13),label="1980'") + theme_bw()
  • 86.
    Data Visualization 86 of98 http://nycdatascience.com/part4_en/
  • 87.
    Data Visualization 87 of98 http://nycdatascience.com/part4_en/
  • 88.
    Data Visualization 88 of98 http://nycdatascience.com/part4_en/
  • 89.
    Data Visualization 89 of98 http://nycdatascience.com/part4_en/
  • 90.
    Data Visualization 90 of98 http://nycdatascience.com/part4_en/ library(ggplot2) world <- map_data("world") worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) + geom_path(color='gray10',size=0.3) + geom_point(x=114,y=30,size=10,shape='*') + scale_y_continuous(breaks=(-2:2) * 30) + scale_x_continuous(breaks=(-4:4) * 45) + coord_map("ortho", orientation=c(30, 120, 0)) + theme(panel.grid.major = element_line(colour = "gray50"), panel.background = element_rect(fill = "white"), axis.text=element_blank(), axis.ticks=element_blank(), axis.title=element_blank())
  • 91.
    Data Visualization 91 of98 http://nycdatascience.com/part4_en/
  • 92.
    Data Visualization 92 of98 http://nycdatascience.com/part4_en/ map <- map_data('state') arrests <- USArrests names(arrests) <- tolower(names(arrests)) arrests$region <- tolower(rownames(USArrests)) usmap <- ggplot(data=arrests) + geom_map(map =map,aes(map_id = region,fill = murder),color='gray40' ) + expand_limits(x = map$long, y = map$lat) + scale_fill_continuous(high='red2',low='white') + theme_bw() + theme(panel.grid.major = element_blank(), panel.background = element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.title=element_blank(), legend.position = c(0.95,0.28), legend.background=element_rect(fill="white", colour="white"))+ coord_map('mercat
  • 93.
    Data Visualization 93 of98 http://nycdatascience.com/part4_en/
  • 94.
    Data Visualization 94 of98 http://nycdatascience.com/part4_en/ library(ggmap) library(XML) webpage <-'http://data.earthquake.cn/datashare/globeEarthquake_csn.html' tables <- readHTMLTable(webpage,stringsAsFactors = FALSE) raw <- tables[[6]] data <- raw[,c(1,3,4)] names(data) <- c('date','lan','lon') data$lan <- as.numeric(data$lan) data$lon <- as.numeric(data$lon) data$date <- as.Date(data$date, "%Y-%m-%d") #Read the map data from Google by the ggmap package, and mark the previous data on the earthquake <- ggmap(get_googlemap(center = 'china', zoom=4,maptype='terrain'),extent=' geom_point(data=data,aes(x=lon,y=lan),colour = 'red',alpha=0.7)+ theme(legend.position = "none")
  • 95.
    Data Visualization 95 of98 http://nycdatascience.com/part4_en/
  • 96.
    Data Visualization 96 of98 http://nycdatascience.com/part4_en/ library(googleVis) library(WDI) DF <- WDI(country=c("CN","RU","BR","ZA","IN",'DE','AU','CA','FR','IT','JP','MX','GB',' M <- gvisMotionChart(DF, idvar="country", timevar="year", xvar='EN.ATM.CO2E.KT', yvar='NY.GDP.MKTP.CD') plot(M)
  • 97.
    Data Visualization 97 of98 http://nycdatascience.com/part4_en/
  • 98.
    Data Visualization 98 of98 http://nycdatascience.com/part4_en/