SlideShare a Scribd company logo
1 of 98
Download to read offline
Data Visualization

1 of 98

http://nycdatascience.com/part4_en/
Data Visualization

2 of 98

http://nycdatascience.com/part4_en/
Data Visualization

3 of 98

http://nycdatascience.com/part4_en/
Data Visualization

4 of 98

http://nycdatascience.com/part4_en/

data <- read.table('data/anscombe.txt',T)
data <- data[,-1]
head(data)

1
2
3
4
5
6

x1
10
8
13
9
11
14

x2
10
8
13
9
11
14

x3 x4
y1
y2
y3
y4
10 8 8.04 9.14 7.46 6.58
8 8 6.95 8.14 6.77 5.76
13 8 7.58 8.74 12.74 7.71
9 8 8.81 8.77 7.11 8.84
11 8 8.33 9.26 7.81 8.47
14 8 9.96 8.10 8.84 7.04
Data Visualization

5 of 98

http://nycdatascience.com/part4_en/

colMeans(data)

x1 x2 x3 x4 y1 y2 y3 y4
9.0 9.0 9.0 9.0 7.5 7.5 7.5 7.5

sapply(1:4,function(x) cor(data[,x],data[,x+4]))

[1] 0.816 0.816 0.816 0.817
Data Visualization

6 of 98

http://nycdatascience.com/part4_en/
Data Visualization

7 of 98

http://nycdatascience.com/part4_en/
Data Visualization

8 of 98

http://nycdatascience.com/part4_en/
Data Visualization

9 of 98

http://nycdatascience.com/part4_en/
Data Visualization

10 of 98

http://nycdatascience.com/part4_en/
Data Visualization

11 of 98

http://nycdatascience.com/part4_en/

plot(cars$dist~cars$speed)
Data Visualization

12 of 98

http://nycdatascience.com/part4_en/

plot(cars$dist,type='l')
Data Visualization

13 of 98

http://nycdatascience.com/part4_en/

plot(cars$dist,type='h')
Data Visualization

14 of 98

http://nycdatascience.com/part4_en/

hist(cars$dist)
Data Visualization

15 of 98

http://nycdatascience.com/part4_en/

library(lattice)
num <- sample(1:3,size=50,replace=T)
barchart(table(num))
Data Visualization

16 of 98

http://nycdatascience.com/part4_en/

qqmath(rnorm(100))
Data Visualization

17 of 98

http://nycdatascience.com/part4_en/

stripplot(~ Sepal.Length | Species, data = iris,layout=c(1,3))
Data Visualization

18 of 98

http://nycdatascience.com/part4_en/

densityplot(~ Sepal.Length, groups=Species, data = iris,plot.points=FALSE)
Data Visualization

19 of 98

http://nycdatascience.com/part4_en/

bwplot(Species~ Sepal.Length, data = iris)
Data Visualization

20 of 98

http://nycdatascience.com/part4_en/

xyplot(Sepal.Width~ Sepal.Length, groups=Species, data = iris)
Data Visualization

21 of 98

http://nycdatascience.com/part4_en/

splom(iris[1:4])
Data Visualization

22 of 98

http://nycdatascience.com/part4_en/

histogram(~ Sepal.Length | Species, data = iris,layout=c(1,3))
Data Visualization

23 of 98

http://nycdatascience.com/part4_en/

library(plyr)
func3d <- function(x,y) {
sin(x^2/2 - y^2/4) * cos(2*x - exp(y))
}
vec1 <- vec2 <- seq(0,2,length=30)
para <- expand.grid(x=vec1,y=vec2)
result6 <- mdply(.data=para,.fun=func3d)
Data Visualization

24 of 98

http://nycdatascience.com/part4_en/

library(lattice)
wireframe(V1~x*y,data=result6,scales = list(arrows = FALSE),
drape = TRUE, colorkey = F)
Data Visualization

25 of 98

http://nycdatascience.com/part4_en/

library(ggplot2)
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point()
print(p)
Data Visualization

26 of 98

http://nycdatascience.com/part4_en/

summary(p)

data: manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class [234x11]
mapping: x = cty, y = hwy
faceting: facet_null()
----------------------------------geom_point: na.rm = FALSE
stat_identity:
position_identity: (width = NULL, height = NULL)
Data Visualization

27 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))
p <- p + geom_point()
print(p)
Data Visualization

28 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))
p <- p + geom_smooth()
print(p)
Data Visualization

29 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth()
Data Visualization

30 of 98

http://nycdatascience.com/part4_en/
Data Visualization

31 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth() +
scale_color_manual(values=c('blue2','red4'))
Data Visualization

32 of 98

http://nycdatascience.com/part4_en/
Data Visualization

33 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth() +
scale_color_manual(values=c('blue2','red4')) +
facet_wrap(~ year,ncol=1)
Data Visualization

34 of 98

http://nycdatascience.com/part4_en/
Data Visualization

35 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=class,size=displ),
alpha=0.5,position = "jitter") +
geom_smooth() +
scale_size_continuous(range = c(4, 10)) +
facet_wrap(~ year,ncol=1) +
opts(title='Vehicle model and fuel consumption') +
labs(y='Highway miles per gallon',
x='Urban miles per gallon',
size='Displacement',
colour = 'Model')
Data Visualization

36 of 98

http://nycdatascience.com/part4_en/
Data Visualization

37 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year),size=displ), alpha=0.5,position = "jitter")+
stat_smooth()+
scale_color_manual(values =c('steelblue','red4'))+
scale_size_continuous(range = c(4, 10))
Data Visualization

38 of 98

http://nycdatascience.com/part4_en/
Data Visualization

39 of 98

http://nycdatascience.com/part4_en/

library(ggplot2)
p <- ggplot(data=iris,aes(x=Sepal.Length))+
geom_histogram()
print(p)
Data Visualization

40 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length))+
geom_histogram(binwidth=0.1,
# Set the group gap
fill='skyblue', # Set the fill color
colour='black') # Set the border color
Data Visualization

41 of 98

http://nycdatascience.com/part4_en/
Data Visualization

42 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length)) +
geom_histogram(aes(y=..density..),
fill='skyblue',
color='black') +
geom_density(color='black',
linetype=2,adjust=2)
Data Visualization

43 of 98

http://nycdatascience.com/part4_en/
Data Visualization

44 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length)) +
geom_histogram(aes(y=..density..), # Note: set y to relative frequency
fill='gray60',
color='gray') +
geom_density(color='black',linetype=1,adjust=0.5) +
geom_density(color='black',linetype=2,adjust=1) +
geom_density(color='black',linetype=3,adjust=2)
Data Visualization

45 of 98

http://nycdatascience.com/part4_en/
Data Visualization

46 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Sepal.Length,fill=Species)) + geom_density(alpha=0.5,color='gra
print(p)
Data Visualization

47 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_boxplot()
print(p)
Data Visualization

48 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_violin()
print(p)
Data Visualization

49 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,
fill=Species)) +
geom_violin(fill='gray',alpha=0.5) +
geom_dotplot(binaxis = "y", stackdir = "center")
print(p)
Data Visualization

50 of 98

http://nycdatascience.com/part4_en/
Data Visualization

51 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(mpg,aes(x=class)) +
geom_bar()
print(p)
Data Visualization

52 of 98

http://nycdatascience.com/part4_en/

mpg$year <- factor(mpg$year)
p <- ggplot(mpg,aes(x=class,fill=year)) +
geom_bar(color='black')
Data Visualization

53 of 98

http://nycdatascience.com/part4_en/
Data Visualization

54 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(mpg,aes(x=class,fill=year)) +
geom_bar(color='black',
position=position_dodge())
Data Visualization

55 of 98

http://nycdatascience.com/part4_en/
Data Visualization

56 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) +
geom_bar(width = 1)+
coord_polar(theta = "y")
Data Visualization

57 of 98

http://nycdatascience.com/part4_en/
Data Visualization

58 of 98

http://nycdatascience.com/part4_en/

set.seed(1)
# Randomly generate 100 wind directions, and divide them into 16 intervals.
dir <- cut_interval(runif(100,0,360),n=16)
# Randomly generate 100 wind speed, and divide them into 4 intensities.
mag <- cut_interval(rgamma(100,15),4)
sample <- data.frame(dir=dir,mag=mag)
# Map wind direction to X-axie, frequency to Y-axie and speed to fill colors. Transfor
p <- ggplot(sample,aes(x=dir,fill=mag)) +
geom_bar()+ coord_polar()
Data Visualization

59 of 98

http://nycdatascience.com/part4_en/
Data Visualization

60 of 98

http://nycdatascience.com/part4_en/
Data Visualization

61 of 98

http://nycdatascience.com/part4_en/
Data Visualization

62 of 98

http://nycdatascience.com/part4_en/

data <- read.csv('data/soft_impact.csv',T)
library(reshape2)
data.melt <- melt(data,id='Year')
p <- ggplot(data.melt,aes(x=Year,y=value,
group=variable,fill=variable)) +
geom_area(color='black',size=0.3,
position=position_fill()) +
scale_fill_brewer()
Data Visualization

63 of 98

http://nycdatascience.com/part4_en/
Data Visualization

64 of 98

http://nycdatascience.com/part4_en/
Data Visualization

65 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point()
print(p)
Data Visualization

66 of 98

http://nycdatascience.com/part4_en/

mpg$year <- factor(mpg$year)
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year))
print(p)
Data Visualization

67 of 98

http://nycdatascience.com/part4_en/

mpg$year <- factor(mpg$year)
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,shape=year))
print(p)
Data Visualization

68 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position
print(p)
Data Visualization

69 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(color=year),alpha=0.5,position = "jitter") +
geom_smooth(method='lm')
print(p)
Data Visualization

70 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(color=year,size=displ),alpha=0.5,position = "jitter") +
geom_smooth(method='lm') +
scale_size_continuous(range = c(4, 10))
Data Visualization

71 of 98

http://nycdatascience.com/part4_en/
Data Visualization

72 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(colour=class,size=displ),
alpha=0.5,position = "jitter") +
geom_smooth() +
scale_size_continuous(range = c(4, 10)) +
facet_wrap(~ year,ncol=1)
Data Visualization

73 of 98

http://nycdatascience.com/part4_en/
Data Visualization

74 of 98

http://nycdatascience.com/part4_en/
Data Visualization

75 of 98

http://nycdatascience.com/part4_en/
Data Visualization

76 of 98

http://nycdatascience.com/part4_en/
Data Visualization

77 of 98

http://nycdatascience.com/part4_en/
Data Visualization

78 of 98

http://nycdatascience.com/part4_en/
Data Visualization

79 of 98

http://nycdatascience.com/part4_en/
Data Visualization

80 of 98

http://nycdatascience.com/part4_en/
Data Visualization

81 of 98

http://nycdatascience.com/part4_en/

fillcolor <- ifelse(economics[440:470,'unemploy']<8000,'steelblue','red4')
p <- ggplot(economics[440:470,],aes(x=date,y=unemploy)) +
geom_bar(stat='identity',
fill=fillcolor)
Data Visualization

82 of 98

http://nycdatascience.com/part4_en/
Data Visualization

83 of 98

http://nycdatascience.com/part4_en/

p <- ggplot(economics[300:470,],aes(x=date,ymax=psavert,ymin=0)) +
geom_linerange(color='grey20',size=0.5) +
geom_point(aes(y=psavert),color='red4') +
theme_bw()
Data Visualization

84 of 98

http://nycdatascience.com/part4_en/
Data Visualization

85 of 98

http://nycdatascience.com/part4_en/

fill.color <- ifelse(economics$date > '1980-01-01' &
economics$date < '1990-01-01',
'steelblue','red4')
p <- ggplot(economics,aes(x=date,ymax=psavert,ymin=0)) +
geom_linerange(color=fill.color,size=0.9) +
geom_text(aes(x=as.Date("1985-01-01",'%Y-%m-%d'),y=13),label="1980'") +
theme_bw()
Data Visualization

86 of 98

http://nycdatascience.com/part4_en/
Data Visualization

87 of 98

http://nycdatascience.com/part4_en/
Data Visualization

88 of 98

http://nycdatascience.com/part4_en/
Data Visualization

89 of 98

http://nycdatascience.com/part4_en/
Data Visualization

90 of 98

http://nycdatascience.com/part4_en/

library(ggplot2)
world <- map_data("world")
worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +
geom_path(color='gray10',size=0.3) +
geom_point(x=114,y=30,size=10,shape='*') +
scale_y_continuous(breaks=(-2:2) * 30) +
scale_x_continuous(breaks=(-4:4) * 45) +
coord_map("ortho", orientation=c(30, 120, 0)) +
theme(panel.grid.major = element_line(colour = "gray50"),
panel.background = element_rect(fill = "white"),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank())
Data Visualization

91 of 98

http://nycdatascience.com/part4_en/
Data Visualization

92 of 98

http://nycdatascience.com/part4_en/

map <- map_data('state')
arrests <- USArrests
names(arrests) <- tolower(names(arrests))
arrests$region <- tolower(rownames(USArrests))
usmap <- ggplot(data=arrests) +
geom_map(map =map,aes(map_id = region,fill = murder),color='gray40' ) +
expand_limits(x = map$long, y = map$lat) +
scale_fill_continuous(high='red2',low='white') +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.position = c(0.95,0.28),
legend.background=element_rect(fill="white", colour="white"))+ coord_map('mercat
Data Visualization

93 of 98

http://nycdatascience.com/part4_en/
Data Visualization

94 of 98

http://nycdatascience.com/part4_en/

library(ggmap)
library(XML)
webpage <-'http://data.earthquake.cn/datashare/globeEarthquake_csn.html'
tables <- readHTMLTable(webpage,stringsAsFactors = FALSE)
raw <- tables[[6]]
data <- raw[,c(1,3,4)]
names(data) <- c('date','lan','lon')
data$lan <- as.numeric(data$lan)
data$lon <- as.numeric(data$lon)
data$date <- as.Date(data$date, "%Y-%m-%d")
#Read the map data from Google by the ggmap package, and mark the previous data on the
earthquake <- ggmap(get_googlemap(center = 'china', zoom=4,maptype='terrain'),extent='
geom_point(data=data,aes(x=lon,y=lan),colour = 'red',alpha=0.7)+
theme(legend.position = "none")
Data Visualization

95 of 98

http://nycdatascience.com/part4_en/
Data Visualization

96 of 98

http://nycdatascience.com/part4_en/

library(googleVis)
library(WDI)
DF <- WDI(country=c("CN","RU","BR","ZA","IN",'DE','AU','CA','FR','IT','JP','MX','GB','
M <- gvisMotionChart(DF, idvar="country", timevar="year",
xvar='EN.ATM.CO2E.KT',
yvar='NY.GDP.MKTP.CD')
plot(M)
Data Visualization

97 of 98

http://nycdatascience.com/part4_en/
Data Visualization

98 of 98

http://nycdatascience.com/part4_en/

More Related Content

What's hot

What's hot (9)

Data Visualization With R
Data Visualization With RData Visualization With R
Data Visualization With R
 
Data flow vs. procedural programming: How to put your algorithms into Flink
Data flow vs. procedural programming: How to put your algorithms into FlinkData flow vs. procedural programming: How to put your algorithms into Flink
Data flow vs. procedural programming: How to put your algorithms into Flink
 
QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...
QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...
QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...
 
Maps
MapsMaps
Maps
 
R studio
R studio R studio
R studio
 
Gate-Cs 2010
Gate-Cs 2010Gate-Cs 2010
Gate-Cs 2010
 
DocEng2010 Bilauca Healy - A New Model for Automated Table Layout
DocEng2010 Bilauca Healy - A New Model for Automated Table LayoutDocEng2010 Bilauca Healy - A New Model for Automated Table Layout
DocEng2010 Bilauca Healy - A New Model for Automated Table Layout
 
Report
ReportReport
Report
 
Gis (model questions)777
Gis (model questions)777Gis (model questions)777
Gis (model questions)777
 

Viewers also liked

Viewers also liked (20)

Visualization Methods Overview Presentation Cambridge University Eppler Septe...
Visualization Methods Overview Presentation Cambridge University Eppler Septe...Visualization Methods Overview Presentation Cambridge University Eppler Septe...
Visualization Methods Overview Presentation Cambridge University Eppler Septe...
 
Data Visualization with R
Data Visualization with RData Visualization with R
Data Visualization with R
 
Ranking and Diversity in Recommendations - RecSys Stammtisch at SoundCloud, B...
Ranking and Diversity in Recommendations - RecSys Stammtisch at SoundCloud, B...Ranking and Diversity in Recommendations - RecSys Stammtisch at SoundCloud, B...
Ranking and Diversity in Recommendations - RecSys Stammtisch at SoundCloud, B...
 
Data Analaytics.04. Data visualization
Data Analaytics.04. Data visualizationData Analaytics.04. Data visualization
Data Analaytics.04. Data visualization
 
5 R Tutorial Data Visualization
5 R Tutorial Data Visualization5 R Tutorial Data Visualization
5 R Tutorial Data Visualization
 
Machine Learning Exploration, R, and Data Visualization
Machine Learning Exploration, R, and Data VisualizationMachine Learning Exploration, R, and Data Visualization
Machine Learning Exploration, R, and Data Visualization
 
DATA VISUALIZATION WITH R PACKAGES
DATA VISUALIZATION WITH R PACKAGESDATA VISUALIZATION WITH R PACKAGES
DATA VISUALIZATION WITH R PACKAGES
 
Data visualization with R
Data visualization with RData visualization with R
Data visualization with R
 
Data Visualization in R
Data Visualization in RData Visualization in R
Data Visualization in R
 
Application of online data analytics to a continuous process polybutene unit
Application of online data analytics to a continuous process polybutene unitApplication of online data analytics to a continuous process polybutene unit
Application of online data analytics to a continuous process polybutene unit
 
Data management services outsourcing – data mining, data entry and data proce...
Data management services outsourcing – data mining, data entry and data proce...Data management services outsourcing – data mining, data entry and data proce...
Data management services outsourcing – data mining, data entry and data proce...
 
RMySQL Tutorial For Beginners
RMySQL Tutorial For BeginnersRMySQL Tutorial For Beginners
RMySQL Tutorial For Beginners
 
Use r tutorial part1, introduction to sparkr
Use r tutorial part1, introduction to sparkrUse r tutorial part1, introduction to sparkr
Use r tutorial part1, introduction to sparkr
 
20170126 big data processing
20170126 big data processing20170126 big data processing
20170126 big data processing
 
Data Mining: Data processing
Data Mining: Data processingData Mining: Data processing
Data Mining: Data processing
 
Machine Learning for Recommender Systems MLSS 2015 Sydney
Machine Learning for Recommender Systems MLSS 2015 SydneyMachine Learning for Recommender Systems MLSS 2015 Sydney
Machine Learning for Recommender Systems MLSS 2015 Sydney
 
Deep Learning for Recommender Systems - Budapest RecSys Meetup
Deep Learning for Recommender Systems  - Budapest RecSys MeetupDeep Learning for Recommender Systems  - Budapest RecSys Meetup
Deep Learning for Recommender Systems - Budapest RecSys Meetup
 
Data processing cycle
Data processing cycleData processing cycle
Data processing cycle
 
Ebook - The Guide to Master Data Management
Ebook - The Guide to Master Data Management Ebook - The Guide to Master Data Management
Ebook - The Guide to Master Data Management
 
DATA PROCESSING CYCLE
DATA PROCESSING CYCLEDATA PROCESSING CYCLE
DATA PROCESSING CYCLE
 

Similar to R class 5 -data visualization

Similar to R class 5 -data visualization (20)

Time Series Analysis and Mining with R
Time Series Analysis and Mining with RTime Series Analysis and Mining with R
Time Series Analysis and Mining with R
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Metric-learn, a Scikit-learn compatible package
Metric-learn, a Scikit-learn compatible packageMetric-learn, a Scikit-learn compatible package
Metric-learn, a Scikit-learn compatible package
 
Python grass
Python grassPython grass
Python grass
 
Tech talk ggplot2
Tech talk   ggplot2Tech talk   ggplot2
Tech talk ggplot2
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
 
R data mining-Time Series Analysis with R
R data mining-Time Series Analysis with RR data mining-Time Series Analysis with R
R data mining-Time Series Analysis with R
 
ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions
 
R programming language
R programming languageR programming language
R programming language
 
Linear Logistic regession_Practical.pptx
Linear Logistic regession_Practical.pptxLinear Logistic regession_Practical.pptx
Linear Logistic regession_Practical.pptx
 
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its authorKaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
 
Seminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mmeSeminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mme
 
Perm winter school 2014.01.31
Perm winter school 2014.01.31Perm winter school 2014.01.31
Perm winter school 2014.01.31
 
Inria Tech Talk - La classification de données complexes avec MASSICCC
Inria Tech Talk - La classification de données complexes avec MASSICCCInria Tech Talk - La classification de données complexes avec MASSICCC
Inria Tech Talk - La classification de données complexes avec MASSICCC
 
ML .pptx
ML .pptxML .pptx
ML .pptx
 
Next Generation Solutions with Neo4j
Next Generation Solutions with Neo4jNext Generation Solutions with Neo4j
Next Generation Solutions with Neo4j
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with R
 
The Web map stack on Django
The Web map stack on DjangoThe Web map stack on Django
The Web map stack on Django
 
R Language Introduction
R Language IntroductionR Language Introduction
R Language Introduction
 
Goal Based Data Production with Sim Simeonov
Goal Based Data Production with Sim SimeonovGoal Based Data Production with Sim Simeonov
Goal Based Data Production with Sim Simeonov
 

More from Vivian S. Zhang

Wikipedia: Tuned Predictions on Big Data
Wikipedia: Tuned Predictions on Big DataWikipedia: Tuned Predictions on Big Data
Wikipedia: Tuned Predictions on Big Data
Vivian S. Zhang
 
Streaming Python on Hadoop
Streaming Python on HadoopStreaming Python on Hadoop
Streaming Python on Hadoop
Vivian S. Zhang
 
Max Kuhn's talk on R machine learning
Max Kuhn's talk on R machine learningMax Kuhn's talk on R machine learning
Max Kuhn's talk on R machine learning
Vivian S. Zhang
 
Introducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with rIntroducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with r
Vivian S. Zhang
 

More from Vivian S. Zhang (20)

Why NYC DSA.pdf
Why NYC DSA.pdfWhy NYC DSA.pdf
Why NYC DSA.pdf
 
Career services workshop- Roger Ren
Career services workshop- Roger RenCareer services workshop- Roger Ren
Career services workshop- Roger Ren
 
Nycdsa wordpress guide book
Nycdsa wordpress guide bookNycdsa wordpress guide book
Nycdsa wordpress guide book
 
We're so skewed_presentation
We're so skewed_presentationWe're so skewed_presentation
We're so skewed_presentation
 
Wikipedia: Tuned Predictions on Big Data
Wikipedia: Tuned Predictions on Big DataWikipedia: Tuned Predictions on Big Data
Wikipedia: Tuned Predictions on Big Data
 
A Hybrid Recommender with Yelp Challenge Data
A Hybrid Recommender with Yelp Challenge Data A Hybrid Recommender with Yelp Challenge Data
A Hybrid Recommender with Yelp Challenge Data
 
Kaggle Top1% Solution: Predicting Housing Prices in Moscow
Kaggle Top1% Solution: Predicting Housing Prices in Moscow Kaggle Top1% Solution: Predicting Housing Prices in Moscow
Kaggle Top1% Solution: Predicting Housing Prices in Moscow
 
Data mining with caret package
Data mining with caret packageData mining with caret package
Data mining with caret package
 
Xgboost
XgboostXgboost
Xgboost
 
Streaming Python on Hadoop
Streaming Python on HadoopStreaming Python on Hadoop
Streaming Python on Hadoop
 
Xgboost
XgboostXgboost
Xgboost
 
Nyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedNyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expanded
 
Nycdsa ml conference slides march 2015
Nycdsa ml conference slides march 2015 Nycdsa ml conference slides march 2015
Nycdsa ml conference slides march 2015
 
THE HACK ON JERSEY CITY CONDO PRICES explore trends in public data
THE HACK ON JERSEY CITY CONDO PRICES explore trends in public dataTHE HACK ON JERSEY CITY CONDO PRICES explore trends in public data
THE HACK ON JERSEY CITY CONDO PRICES explore trends in public data
 
Max Kuhn's talk on R machine learning
Max Kuhn's talk on R machine learningMax Kuhn's talk on R machine learning
Max Kuhn's talk on R machine learning
 
Winning data science competitions, presented by Owen Zhang
Winning data science competitions, presented by Owen ZhangWinning data science competitions, presented by Owen Zhang
Winning data science competitions, presented by Owen Zhang
 
Using Machine Learning to aid Journalism at the New York Times
Using Machine Learning to aid Journalism at the New York TimesUsing Machine Learning to aid Journalism at the New York Times
Using Machine Learning to aid Journalism at the New York Times
 
Introducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with rIntroducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with r
 
Bayesian models in r
Bayesian models in rBayesian models in r
Bayesian models in r
 
Natural Language Processing(SupStat Inc)
Natural Language Processing(SupStat Inc)Natural Language Processing(SupStat Inc)
Natural Language Processing(SupStat Inc)
 

Recently uploaded

Making and Justifying Mathematical Decisions.pdf
Making and Justifying Mathematical Decisions.pdfMaking and Justifying Mathematical Decisions.pdf
Making and Justifying Mathematical Decisions.pdf
Chris Hunter
 
Activity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdfActivity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdf
ciinovamais
 
The basics of sentences session 2pptx copy.pptx
The basics of sentences session 2pptx copy.pptxThe basics of sentences session 2pptx copy.pptx
The basics of sentences session 2pptx copy.pptx
heathfieldcps1
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
kauryashika82
 

Recently uploaded (20)

Class 11th Physics NEET formula sheet pdf
Class 11th Physics NEET formula sheet pdfClass 11th Physics NEET formula sheet pdf
Class 11th Physics NEET formula sheet pdf
 
Energy Resources. ( B. Pharmacy, 1st Year, Sem-II) Natural Resources
Energy Resources. ( B. Pharmacy, 1st Year, Sem-II) Natural ResourcesEnergy Resources. ( B. Pharmacy, 1st Year, Sem-II) Natural Resources
Energy Resources. ( B. Pharmacy, 1st Year, Sem-II) Natural Resources
 
How to Give a Domain for a Field in Odoo 17
How to Give a Domain for a Field in Odoo 17How to Give a Domain for a Field in Odoo 17
How to Give a Domain for a Field in Odoo 17
 
Making and Justifying Mathematical Decisions.pdf
Making and Justifying Mathematical Decisions.pdfMaking and Justifying Mathematical Decisions.pdf
Making and Justifying Mathematical Decisions.pdf
 
Activity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdfActivity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdf
 
This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.
 
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
 
The basics of sentences session 2pptx copy.pptx
The basics of sentences session 2pptx copy.pptxThe basics of sentences session 2pptx copy.pptx
The basics of sentences session 2pptx copy.pptx
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
 
Unit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptxUnit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptx
 
psychiatric nursing HISTORY COLLECTION .docx
psychiatric  nursing HISTORY  COLLECTION  .docxpsychiatric  nursing HISTORY  COLLECTION  .docx
psychiatric nursing HISTORY COLLECTION .docx
 
PROCESS RECORDING FORMAT.docx
PROCESS      RECORDING        FORMAT.docxPROCESS      RECORDING        FORMAT.docx
PROCESS RECORDING FORMAT.docx
 
ComPTIA Overview | Comptia Security+ Book SY0-701
ComPTIA Overview | Comptia Security+ Book SY0-701ComPTIA Overview | Comptia Security+ Book SY0-701
ComPTIA Overview | Comptia Security+ Book SY0-701
 
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptxBasic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
 
Holdier Curriculum Vitae (April 2024).pdf
Holdier Curriculum Vitae (April 2024).pdfHoldier Curriculum Vitae (April 2024).pdf
Holdier Curriculum Vitae (April 2024).pdf
 
Z Score,T Score, Percential Rank and Box Plot Graph
Z Score,T Score, Percential Rank and Box Plot GraphZ Score,T Score, Percential Rank and Box Plot Graph
Z Score,T Score, Percential Rank and Box Plot Graph
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
 
Food Chain and Food Web (Ecosystem) EVS, B. Pharmacy 1st Year, Sem-II
Food Chain and Food Web (Ecosystem) EVS, B. Pharmacy 1st Year, Sem-IIFood Chain and Food Web (Ecosystem) EVS, B. Pharmacy 1st Year, Sem-II
Food Chain and Food Web (Ecosystem) EVS, B. Pharmacy 1st Year, Sem-II
 
Sociology 101 Demonstration of Learning Exhibit
Sociology 101 Demonstration of Learning ExhibitSociology 101 Demonstration of Learning Exhibit
Sociology 101 Demonstration of Learning Exhibit
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdf
 

R class 5 -data visualization

  • 1. Data Visualization 1 of 98 http://nycdatascience.com/part4_en/
  • 2. Data Visualization 2 of 98 http://nycdatascience.com/part4_en/
  • 3. Data Visualization 3 of 98 http://nycdatascience.com/part4_en/
  • 4. Data Visualization 4 of 98 http://nycdatascience.com/part4_en/ data <- read.table('data/anscombe.txt',T) data <- data[,-1] head(data) 1 2 3 4 5 6 x1 10 8 13 9 11 14 x2 10 8 13 9 11 14 x3 x4 y1 y2 y3 y4 10 8 8.04 9.14 7.46 6.58 8 8 6.95 8.14 6.77 5.76 13 8 7.58 8.74 12.74 7.71 9 8 8.81 8.77 7.11 8.84 11 8 8.33 9.26 7.81 8.47 14 8 9.96 8.10 8.84 7.04
  • 5. Data Visualization 5 of 98 http://nycdatascience.com/part4_en/ colMeans(data) x1 x2 x3 x4 y1 y2 y3 y4 9.0 9.0 9.0 9.0 7.5 7.5 7.5 7.5 sapply(1:4,function(x) cor(data[,x],data[,x+4])) [1] 0.816 0.816 0.816 0.817
  • 6. Data Visualization 6 of 98 http://nycdatascience.com/part4_en/
  • 7. Data Visualization 7 of 98 http://nycdatascience.com/part4_en/
  • 8. Data Visualization 8 of 98 http://nycdatascience.com/part4_en/
  • 9. Data Visualization 9 of 98 http://nycdatascience.com/part4_en/
  • 10. Data Visualization 10 of 98 http://nycdatascience.com/part4_en/
  • 11. Data Visualization 11 of 98 http://nycdatascience.com/part4_en/ plot(cars$dist~cars$speed)
  • 12. Data Visualization 12 of 98 http://nycdatascience.com/part4_en/ plot(cars$dist,type='l')
  • 13. Data Visualization 13 of 98 http://nycdatascience.com/part4_en/ plot(cars$dist,type='h')
  • 14. Data Visualization 14 of 98 http://nycdatascience.com/part4_en/ hist(cars$dist)
  • 15. Data Visualization 15 of 98 http://nycdatascience.com/part4_en/ library(lattice) num <- sample(1:3,size=50,replace=T) barchart(table(num))
  • 16. Data Visualization 16 of 98 http://nycdatascience.com/part4_en/ qqmath(rnorm(100))
  • 17. Data Visualization 17 of 98 http://nycdatascience.com/part4_en/ stripplot(~ Sepal.Length | Species, data = iris,layout=c(1,3))
  • 18. Data Visualization 18 of 98 http://nycdatascience.com/part4_en/ densityplot(~ Sepal.Length, groups=Species, data = iris,plot.points=FALSE)
  • 19. Data Visualization 19 of 98 http://nycdatascience.com/part4_en/ bwplot(Species~ Sepal.Length, data = iris)
  • 20. Data Visualization 20 of 98 http://nycdatascience.com/part4_en/ xyplot(Sepal.Width~ Sepal.Length, groups=Species, data = iris)
  • 21. Data Visualization 21 of 98 http://nycdatascience.com/part4_en/ splom(iris[1:4])
  • 22. Data Visualization 22 of 98 http://nycdatascience.com/part4_en/ histogram(~ Sepal.Length | Species, data = iris,layout=c(1,3))
  • 23. Data Visualization 23 of 98 http://nycdatascience.com/part4_en/ library(plyr) func3d <- function(x,y) { sin(x^2/2 - y^2/4) * cos(2*x - exp(y)) } vec1 <- vec2 <- seq(0,2,length=30) para <- expand.grid(x=vec1,y=vec2) result6 <- mdply(.data=para,.fun=func3d)
  • 24. Data Visualization 24 of 98 http://nycdatascience.com/part4_en/ library(lattice) wireframe(V1~x*y,data=result6,scales = list(arrows = FALSE), drape = TRUE, colorkey = F)
  • 25. Data Visualization 25 of 98 http://nycdatascience.com/part4_en/ library(ggplot2) p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point() print(p)
  • 26. Data Visualization 26 of 98 http://nycdatascience.com/part4_en/ summary(p) data: manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class [234x11] mapping: x = cty, y = hwy faceting: facet_null() ----------------------------------geom_point: na.rm = FALSE stat_identity: position_identity: (width = NULL, height = NULL)
  • 27. Data Visualization 27 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year))) p <- p + geom_point() print(p)
  • 28. Data Visualization 28 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year))) p <- p + geom_smooth() print(p)
  • 29. Data Visualization 29 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year))) + geom_smooth()
  • 30. Data Visualization 30 of 98 http://nycdatascience.com/part4_en/
  • 31. Data Visualization 31 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year))) + geom_smooth() + scale_color_manual(values=c('blue2','red4'))
  • 32. Data Visualization 32 of 98 http://nycdatascience.com/part4_en/
  • 33. Data Visualization 33 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year))) + geom_smooth() + scale_color_manual(values=c('blue2','red4')) + facet_wrap(~ year,ncol=1)
  • 34. Data Visualization 34 of 98 http://nycdatascience.com/part4_en/
  • 35. Data Visualization 35 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=class,size=displ), alpha=0.5,position = "jitter") + geom_smooth() + scale_size_continuous(range = c(4, 10)) + facet_wrap(~ year,ncol=1) + opts(title='Vehicle model and fuel consumption') + labs(y='Highway miles per gallon', x='Urban miles per gallon', size='Displacement', colour = 'Model')
  • 36. Data Visualization 36 of 98 http://nycdatascience.com/part4_en/
  • 37. Data Visualization 37 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) + geom_point(aes(colour=factor(year),size=displ), alpha=0.5,position = "jitter")+ stat_smooth()+ scale_color_manual(values =c('steelblue','red4'))+ scale_size_continuous(range = c(4, 10))
  • 38. Data Visualization 38 of 98 http://nycdatascience.com/part4_en/
  • 39. Data Visualization 39 of 98 http://nycdatascience.com/part4_en/ library(ggplot2) p <- ggplot(data=iris,aes(x=Sepal.Length))+ geom_histogram() print(p)
  • 40. Data Visualization 40 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length))+ geom_histogram(binwidth=0.1, # Set the group gap fill='skyblue', # Set the fill color colour='black') # Set the border color
  • 41. Data Visualization 41 of 98 http://nycdatascience.com/part4_en/
  • 42. Data Visualization 42 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length)) + geom_histogram(aes(y=..density..), fill='skyblue', color='black') + geom_density(color='black', linetype=2,adjust=2)
  • 43. Data Visualization 43 of 98 http://nycdatascience.com/part4_en/
  • 44. Data Visualization 44 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length)) + geom_histogram(aes(y=..density..), # Note: set y to relative frequency fill='gray60', color='gray') + geom_density(color='black',linetype=1,adjust=0.5) + geom_density(color='black',linetype=2,adjust=1) + geom_density(color='black',linetype=3,adjust=2)
  • 45. Data Visualization 45 of 98 http://nycdatascience.com/part4_en/
  • 46. Data Visualization 46 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Sepal.Length,fill=Species)) + geom_density(alpha=0.5,color='gra print(p)
  • 47. Data Visualization 47 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_boxplot() print(p)
  • 48. Data Visualization 48 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_violin() print(p)
  • 49. Data Visualization 49 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(iris,aes(x=Species,y=Sepal.Length, fill=Species)) + geom_violin(fill='gray',alpha=0.5) + geom_dotplot(binaxis = "y", stackdir = "center") print(p)
  • 50. Data Visualization 50 of 98 http://nycdatascience.com/part4_en/
  • 51. Data Visualization 51 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(mpg,aes(x=class)) + geom_bar() print(p)
  • 52. Data Visualization 52 of 98 http://nycdatascience.com/part4_en/ mpg$year <- factor(mpg$year) p <- ggplot(mpg,aes(x=class,fill=year)) + geom_bar(color='black')
  • 53. Data Visualization 53 of 98 http://nycdatascience.com/part4_en/
  • 54. Data Visualization 54 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(mpg,aes(x=class,fill=year)) + geom_bar(color='black', position=position_dodge())
  • 55. Data Visualization 55 of 98 http://nycdatascience.com/part4_en/
  • 56. Data Visualization 56 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) + geom_bar(width = 1)+ coord_polar(theta = "y")
  • 57. Data Visualization 57 of 98 http://nycdatascience.com/part4_en/
  • 58. Data Visualization 58 of 98 http://nycdatascience.com/part4_en/ set.seed(1) # Randomly generate 100 wind directions, and divide them into 16 intervals. dir <- cut_interval(runif(100,0,360),n=16) # Randomly generate 100 wind speed, and divide them into 4 intensities. mag <- cut_interval(rgamma(100,15),4) sample <- data.frame(dir=dir,mag=mag) # Map wind direction to X-axie, frequency to Y-axie and speed to fill colors. Transfor p <- ggplot(sample,aes(x=dir,fill=mag)) + geom_bar()+ coord_polar()
  • 59. Data Visualization 59 of 98 http://nycdatascience.com/part4_en/
  • 60. Data Visualization 60 of 98 http://nycdatascience.com/part4_en/
  • 61. Data Visualization 61 of 98 http://nycdatascience.com/part4_en/
  • 62. Data Visualization 62 of 98 http://nycdatascience.com/part4_en/ data <- read.csv('data/soft_impact.csv',T) library(reshape2) data.melt <- melt(data,id='Year') p <- ggplot(data.melt,aes(x=Year,y=value, group=variable,fill=variable)) + geom_area(color='black',size=0.3, position=position_fill()) + scale_fill_brewer()
  • 63. Data Visualization 63 of 98 http://nycdatascience.com/part4_en/
  • 64. Data Visualization 64 of 98 http://nycdatascience.com/part4_en/
  • 65. Data Visualization 65 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point() print(p)
  • 66. Data Visualization 66 of 98 http://nycdatascience.com/part4_en/ mpg$year <- factor(mpg$year) p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year)) print(p)
  • 67. Data Visualization 67 of 98 http://nycdatascience.com/part4_en/ mpg$year <- factor(mpg$year) p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,shape=year)) print(p)
  • 68. Data Visualization 68 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position print(p)
  • 69. Data Visualization 69 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position = "jitter") + geom_smooth(method='lm') print(p)
  • 70. Data Visualization 70 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,size=displ),alpha=0.5,position = "jitter") + geom_smooth(method='lm') + scale_size_continuous(range = c(4, 10))
  • 71. Data Visualization 71 of 98 http://nycdatascience.com/part4_en/
  • 72. Data Visualization 72 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(colour=class,size=displ), alpha=0.5,position = "jitter") + geom_smooth() + scale_size_continuous(range = c(4, 10)) + facet_wrap(~ year,ncol=1)
  • 73. Data Visualization 73 of 98 http://nycdatascience.com/part4_en/
  • 74. Data Visualization 74 of 98 http://nycdatascience.com/part4_en/
  • 75. Data Visualization 75 of 98 http://nycdatascience.com/part4_en/
  • 76. Data Visualization 76 of 98 http://nycdatascience.com/part4_en/
  • 77. Data Visualization 77 of 98 http://nycdatascience.com/part4_en/
  • 78. Data Visualization 78 of 98 http://nycdatascience.com/part4_en/
  • 79. Data Visualization 79 of 98 http://nycdatascience.com/part4_en/
  • 80. Data Visualization 80 of 98 http://nycdatascience.com/part4_en/
  • 81. Data Visualization 81 of 98 http://nycdatascience.com/part4_en/ fillcolor <- ifelse(economics[440:470,'unemploy']<8000,'steelblue','red4') p <- ggplot(economics[440:470,],aes(x=date,y=unemploy)) + geom_bar(stat='identity', fill=fillcolor)
  • 82. Data Visualization 82 of 98 http://nycdatascience.com/part4_en/
  • 83. Data Visualization 83 of 98 http://nycdatascience.com/part4_en/ p <- ggplot(economics[300:470,],aes(x=date,ymax=psavert,ymin=0)) + geom_linerange(color='grey20',size=0.5) + geom_point(aes(y=psavert),color='red4') + theme_bw()
  • 84. Data Visualization 84 of 98 http://nycdatascience.com/part4_en/
  • 85. Data Visualization 85 of 98 http://nycdatascience.com/part4_en/ fill.color <- ifelse(economics$date > '1980-01-01' & economics$date < '1990-01-01', 'steelblue','red4') p <- ggplot(economics,aes(x=date,ymax=psavert,ymin=0)) + geom_linerange(color=fill.color,size=0.9) + geom_text(aes(x=as.Date("1985-01-01",'%Y-%m-%d'),y=13),label="1980'") + theme_bw()
  • 86. Data Visualization 86 of 98 http://nycdatascience.com/part4_en/
  • 87. Data Visualization 87 of 98 http://nycdatascience.com/part4_en/
  • 88. Data Visualization 88 of 98 http://nycdatascience.com/part4_en/
  • 89. Data Visualization 89 of 98 http://nycdatascience.com/part4_en/
  • 90. Data Visualization 90 of 98 http://nycdatascience.com/part4_en/ library(ggplot2) world <- map_data("world") worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) + geom_path(color='gray10',size=0.3) + geom_point(x=114,y=30,size=10,shape='*') + scale_y_continuous(breaks=(-2:2) * 30) + scale_x_continuous(breaks=(-4:4) * 45) + coord_map("ortho", orientation=c(30, 120, 0)) + theme(panel.grid.major = element_line(colour = "gray50"), panel.background = element_rect(fill = "white"), axis.text=element_blank(), axis.ticks=element_blank(), axis.title=element_blank())
  • 91. Data Visualization 91 of 98 http://nycdatascience.com/part4_en/
  • 92. Data Visualization 92 of 98 http://nycdatascience.com/part4_en/ map <- map_data('state') arrests <- USArrests names(arrests) <- tolower(names(arrests)) arrests$region <- tolower(rownames(USArrests)) usmap <- ggplot(data=arrests) + geom_map(map =map,aes(map_id = region,fill = murder),color='gray40' ) + expand_limits(x = map$long, y = map$lat) + scale_fill_continuous(high='red2',low='white') + theme_bw() + theme(panel.grid.major = element_blank(), panel.background = element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.title=element_blank(), legend.position = c(0.95,0.28), legend.background=element_rect(fill="white", colour="white"))+ coord_map('mercat
  • 93. Data Visualization 93 of 98 http://nycdatascience.com/part4_en/
  • 94. Data Visualization 94 of 98 http://nycdatascience.com/part4_en/ library(ggmap) library(XML) webpage <-'http://data.earthquake.cn/datashare/globeEarthquake_csn.html' tables <- readHTMLTable(webpage,stringsAsFactors = FALSE) raw <- tables[[6]] data <- raw[,c(1,3,4)] names(data) <- c('date','lan','lon') data$lan <- as.numeric(data$lan) data$lon <- as.numeric(data$lon) data$date <- as.Date(data$date, "%Y-%m-%d") #Read the map data from Google by the ggmap package, and mark the previous data on the earthquake <- ggmap(get_googlemap(center = 'china', zoom=4,maptype='terrain'),extent=' geom_point(data=data,aes(x=lon,y=lan),colour = 'red',alpha=0.7)+ theme(legend.position = "none")
  • 95. Data Visualization 95 of 98 http://nycdatascience.com/part4_en/
  • 96. Data Visualization 96 of 98 http://nycdatascience.com/part4_en/ library(googleVis) library(WDI) DF <- WDI(country=c("CN","RU","BR","ZA","IN",'DE','AU','CA','FR','IT','JP','MX','GB',' M <- gvisMotionChart(DF, idvar="country", timevar="year", xvar='EN.ATM.CO2E.KT', yvar='NY.GDP.MKTP.CD') plot(M)
  • 97. Data Visualization 97 of 98 http://nycdatascience.com/part4_en/
  • 98. Data Visualization 98 of 98 http://nycdatascience.com/part4_en/