SlideShare a Scribd company logo
1 of 31
Download to read offline
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
1 of 31 10/18/2020, 4:30 AM
https://www.r-project.org/
Essential tools:
RGUI Basic R processing,
RScript to run batch scripts,
RCMD (to install in Unix/Linux) variants
RStudio is a compelling tool – though defer RStudio until you know R very we
ll – tools are limiting you – bad idea to start with RStudio to learn the la
nguage, IMHO.
Reference Sites: (that I often use, don’t leave home without it)
https://www.r-bloggers.com
https://nabble.com/
http://rfunction.com
https://stackoverflow.com/
https://stats.stackexchange.com/
https://www.datasciencemadesimple.com/
http://www.r-tutor.com/
There are thousands if not more, useful R sites you can learn from
Again to do what you want to get done…otherwise you will be sucked into
vortex..
…
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
2 of 31 10/18/2020, 4:30 AM
ls()
## character(0)
X<-5
7->Y
ifelse(X<Y,'X is Less than Y', 'X is atleast equal to Y')
## [1] "X is Less than Y"
vec<-1:13
is.vector(vec)
## [1] TRUE
vec[4]
## [1] 4
by2<-seq(1,13,2)
(xy2<-seq(1,13,2))
## [1] 1 3 5 7 9 11 13
xy2[4]
## [1] 7
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
3 of 31 10/18/2020, 4:30 AM
is.vector(xy2[4])
## [1] TRUE
length(xy2[4])
## [1] 1
vec[vec %in% by2]
## [1] 1 3 5 7 9 11 13
(xyeven<-seq(0,13,2))
## [1] 0 2 4 6 8 10 12
length(vec)
## [1] 13
mean(vec)
## [1] 7
sd(vec)
## [1] 3.89444
sum(vec)
## [1] 91
cumprod(vec)
## [1] 1 2 6 24 120 720
## [7] 5040 40320 362880 3628800 39916800 479001600
## [13] 6227020800
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
4 of 31 10/18/2020, 4:30 AM
L<-list(X=5,reason="I like 5")
L
## $X
## [1] 5
##
## $reason
## [1] "I like 5"
mx<-matrix(c(rep(0,5),seq(1:5)),nrow=2,ncol=5) # fixed the error now the mx
should have correct values not ALL zeros
mx
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 2 4
## [2,] 0 0 1 3 5
mxbyr<-matrix(c(rep(0,5),seq(1:5)),nrow=2,ncol=5,byrow=TRUE)
mxbyr
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 1 2 3 4 5
dd <- structure(list(
population = c(4.560667108, 1.275920972)
,continents = c('Asia', 'Africa'))
,.Names = c("Pop", "Continent")
,row.names = c(NA, -2L)
,class = "data.frame")
dd
## Pop Continent
## 1 4.560667 Asia
## 2 1.275921 Africa
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
5 of 31 10/18/2020, 4:30 AM
dd<-rbind(dd,c(4.1570842,'Oceania'))
dd
## Pop Continent
## 1 4.560667108 Asia
## 2 1.275920972 Africa
## 3 4.1570842 Oceania
dd<-cbind(dd,density=c(100,36,4))
dd<-rbind(dd,c(0,'pangea'))
dd
## Pop Continent density
## 1 4.560667108 Asia 100
## 2 1.275920972 Africa 36
## 3 4.1570842 Oceania 4
## 4 0 pangea 0
which(dd$Pop==0)
## [1] 4
dd<-dd[-which(dd$Pop==0),]
dd
## Pop Continent density
## 1 4.560667108 Asia 100
## 2 1.275920972 Africa 36
## 3 4.1570842 Oceania 4
birds<-data.frame(nlegs=rep(2,5),can_fly=c(0,1,1,0,1),height=c(25,40,20,150,
10),
color=c('black','black','blue','black','brown'))
birds2<-cbind(birds,c('chicken','vulture','parrot','ostrich','sparrow'))
names(birds2)<-c('nlegs','can_fly','height','color','species')
birds2
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
6 of 31 10/18/2020, 4:30 AM
## nlegs can_fly height color species
## 1 2 0 25 black chicken
## 2 2 1 40 black vulture
## 3 2 1 20 blue parrot
## 4 2 0 150 black ostrich
## 5 2 1 10 brown sparrow
…
chickencolors<-c('black','white','red','mixed')
vulturecolors<-c('grey','black','white')
parrotcolors<-c('teal','green','blue','mixed','pink')
ostrichcolors<-c('grey','black')
sparrowcolors<-c('dark cement','brown')
hchicken<-sample(rnorm(10,25,6),5)
hvulture<-sample(rnorm(10,40,4),5)
hparrot<-sample(rnorm(10,20,2),5)
hostrich<-sample(rnorm(10,150,20),5)
hsparrow<-sample(rnorm(10,10,1),5)
cdset<-rbind(birds2,data.frame(nlegs=rep(2,5),can_fly=rep(0,5), height=hchic
ken,
color=sample(chickencolors,5,replace=T),species=rep('chicken',5)),
data.frame(nlegs=rep(2,5),can_fly=rep(1,5), height=hvulture,
color=sample(vulturecolors,5,replace=T),species=rep('vulture',5)),
data.frame(nlegs=rep(2,5),can_fly=rep(1,5), height=hparrot,
color=sample(parrotcolors,5,replace=T),species=rep('parrot',5)),
data.frame(nlegs=rep(2,5),can_fly=rep(0,5), height=hostrich,
color=sample(ostrichcolors,5,replace=T),species=rep('ostrich',5)),
data.frame(nlegs=rep(2,5),can_fly=rep(1,5), height=hsparrow,
color=sample(sparrowcolors,5,replace=T),species=rep('sparrow',5)))
cdset # just print out the contents
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
7 of 31 10/18/2020, 4:30 AM
## nlegs can_fly height color species
## 1 2 0 25.000000 black chicken
## 2 2 1 40.000000 black vulture
## 3 2 1 20.000000 blue parrot
## 4 2 0 150.000000 black ostrich
## 5 2 1 10.000000 brown sparrow
## 6 2 0 21.795787 red chicken
## 7 2 0 39.459162 mixed chicken
## 8 2 0 22.981968 black chicken
## 9 2 0 17.744720 black chicken
## 10 2 0 25.911222 mixed chicken
## 11 2 1 39.016163 white vulture
## 12 2 1 40.037789 white vulture
## 13 2 1 42.251693 grey vulture
## 14 2 1 39.014589 grey vulture
## 15 2 1 38.475420 white vulture
## 16 2 1 20.316044 mixed parrot
## 17 2 1 22.712721 teal parrot
## 18 2 1 22.840455 mixed parrot
## 19 2 1 14.934359 blue parrot
## 20 2 1 21.195914 blue parrot
## 21 2 0 160.085412 black ostrich
## 22 2 0 140.594205 black ostrich
## 23 2 0 174.088029 grey ostrich
## 24 2 0 157.684178 grey ostrich
## 25 2 0 135.249085 grey ostrich
## 26 2 1 9.295639 dark cement sparrow
## 27 2 1 11.266186 dark cement sparrow
## 28 2 1 9.336063 brown sparrow
## 29 2 1 10.169087 brown sparrow
## 30 2 1 11.060101 brown sparrow
dim(cdset) # what are the dimensions
## [1] 30 5
nrow(cdset) # number of rows
## [1] 30
ncol(cdset) # number of columns
## [1] 5
names(cdset) # data.frames have names matrices dont
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
8 of 31 10/18/2020, 4:30 AM
## [1] "nlegs" "can_fly" "height" "color" "species"
head(cdset)
## nlegs can_fly height color species
## 1 2 0 25.00000 black chicken
## 2 2 1 40.00000 black vulture
## 3 2 1 20.00000 blue parrot
## 4 2 0 150.00000 black ostrich
## 5 2 1 10.00000 brown sparrow
## 6 2 0 21.79579 red chicken
tail(cdset)
## nlegs can_fly height color species
## 25 2 0 135.249085 grey ostrich
## 26 2 1 9.295639 dark cement sparrow
## 27 2 1 11.266186 dark cement sparrow
## 28 2 1 9.336063 brown sparrow
## 29 2 1 10.169087 brown sparrow
## 30 2 1 11.060101 brown sparrow
row.names(cdset)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "1
4" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "2
9" "30"
cdset[1,3]# just one cell
## [1] 25
cdset[1,] # entire observation
## nlegs can_fly height color species
## 1 2 0 25 black chicken
cdset[,3]# entire column or the feature
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
9 of 31 10/18/2020, 4:30 AM
## [1] 25.000000 40.000000 20.000000 150.000000 10.000000 21.795787
## [7] 39.459162 22.981968 17.744720 25.911222 39.016163 40.037789
## [13] 42.251693 39.014589 38.475420 20.316044 22.712721 22.840455
## [19] 14.934359 21.195914 160.085412 140.594205 174.088029 157.684178
## [25] 135.249085 9.295639 11.266186 9.336063 10.169087 11.060101
cdset[cdset$species=='sparrow',]# review just the sparrow data.entire observ
ations
## nlegs can_fly height color species
## 5 2 1 10.000000 brown sparrow
## 26 2 1 9.295639 dark cement sparrow
## 27 2 1 11.266186 dark cement sparrow
## 28 2 1 9.336063 brown sparrow
## 29 2 1 10.169087 brown sparrow
## 30 2 1 11.060101 brown sparrow
cdset[cdset$species=='sparrow',c(1,3,5)]# just some of the columns
## nlegs height species
## 5 2 10.000000 sparrow
## 26 2 9.295639 sparrow
## 27 2 11.266186 sparrow
## 28 2 9.336063 sparrow
## 29 2 10.169087 sparrow
## 30 2 11.060101 sparrow
cdset[cdset$species=='sparrow',c('nlegs','species')]# or by column names
## nlegs species
## 5 2 sparrow
## 26 2 sparrow
## 27 2 sparrow
## 28 2 sparrow
## 29 2 sparrow
## 30 2 sparrow
cdset[cdset$species=='sparrow',-which(names(cdset)=='species')]# filter OUT
some columns
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
10 of 31 10/18/2020, 4:30 AM
## nlegs can_fly height color
## 5 2 1 10.000000 brown
## 26 2 1 9.295639 dark cement
## 27 2 1 11.266186 dark cement
## 28 2 1 9.336063 brown
## 29 2 1 10.169087 brown
## 30 2 1 11.060101 brown
cdset[cdset$species=='sparrow',-which(names(cdset)%in%c('nlegs','species'))]
## can_fly height color
## 5 1 10.000000 brown
## 26 1 9.295639 dark cement
## 27 1 11.266186 dark cement
## 28 1 9.336063 brown
## 29 1 10.169087 brown
## 30 1 11.060101 brown
…
…
…
lapply(1:3,FUN=function(x)x*x) -> exl
exl
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
11 of 31 10/18/2020, 4:30 AM
mx
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 2 4
## [2,] 0 0 1 3 5
apply(mx,2,sd)
## [1] 0.0000000 0.0000000 0.7071068 0.7071068 0.7071068
(mx1<-sapply(mx,FUN=function(x)x+1))
## [1] 1 1 1 1 1 2 3 4 5 6
…
…
prodidlist<-c(paste("P0",1:9,sep=''),paste("P",10:99,sep=''))
cidlist<-c(paste("C0",1:9,sep=''),paste("C",10:22,sep=''))
(df<-data.frame(DID=1,CID="C01",
PID=sample(prodidlist,sample(1:20,1),replace=F),
stringsAsFactors=F))
## DID CID PID
## 1 1 C01 P88
## 2 1 C01 P53
## 3 1 C01 P86
## 4 1 C01 P90
## 5 1 C01 P21
## 6 1 C01 P34
(sample(prodidlist,sample(1:20,1),replace=F))
## [1] "P75" "P34" "P40" "P27" "P71" "P06" "P72"
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
12 of 31 10/18/2020, 4:30 AM
mdf<-do.call('rbind',lapply(1:50,FUN=function(x)
{
cidlist<-sample(cidlist,sample(1:length(cidlist),1),replace=F)
dfi<-do.call('rbind',lapply(cidlist,FUN=
function(cid)data.frame(DID=x,CID=cid,
PID=sample(prodidlist,sample(1:20,1),replace=F))))
}
))
write.table(mdf,
file='purchases.csv',
sep=',',row.names=F,
col.names=T,
quote=F)
head(mdf)
## DID CID PID
## 1 1 C19 P19
## 2 1 C19 P33
## 3 1 C19 P91
## 4 1 C19 P78
## 5 1 C19 P66
## 6 1 C19 P64
nrow(mdf)
## [1] 6436
…
read.csv('purchases.csv',head=T,sep=',')->rmdf
titanic<-read.csv("http://christianherta.de/lehre/dataScience/machineLearnin
g/data/titanic-train.csv",header=T)
head(titanic)
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
13 of 31 10/18/2020, 4:30 AM
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Pa
rch
## 1 Braund, Mr. Owen Harris male 22 1
0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1
0
## 3 Heikkinen, Miss. Laina female 26 0
0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1
0
## 5 Allen, Mr. William Henry male 35 0
0
## 6 Moran, Mr. James male NA 0
0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
dim(titanic)
## [1] 891 12
table(mdf==rmdf)
##
## TRUE
## 19308
cumprod(dim(mdf)) #rows ^ columns the number of elements all of them match a
s they should
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
14 of 31 10/18/2020, 4:30 AM
## [1] 6436 19308
nrow(rmdf)* ncol(rmdf)
## [1] 19308
quantmod::getSymbols(c("IBM","SPY"),from='2020-01-01')
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## [1] "IBM" "SPY"
dim(IBM)
## [1] 201 6
#dim(JNJ)
#quantmod::getSymbols(c("SPY"),from='2020-01-01')
head(IBM)
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
15 of 31 10/18/2020, 4:30 AM
## IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted
## 2020-01-02 135.00 135.92 134.77 135.42 3148600 130.5377
## 2020-01-03 133.57 134.86 133.56 134.34 2373700 129.4967
## 2020-01-06 133.42 134.24 133.20 134.10 2425500 129.2654
## 2020-01-07 133.69 134.96 133.40 134.19 3090800 129.3521
## 2020-01-08 134.51 135.86 133.92 135.31 4346000 130.4317
## 2020-01-09 135.74 136.79 135.31 136.74 3730600 131.8102
head(IBM$IBM.Adjusted)
## IBM.Adjusted
## 2020-01-02 130.5377
## 2020-01-03 129.4967
## 2020-01-06 129.2654
## 2020-01-07 129.3521
## 2020-01-08 130.4317
## 2020-01-09 131.8102
DIBM<-c(head(IBM$IBM.Adjusted,1),head(IBM$IBM.Adjusted,200)) ## fixed the er
ror
head((dailyIBMReturns<-(((as.numeric(IBM$IBM.Adjusted)/DIBM) -1)*100))) ## f
ixed the error now results are full precision
## IBM.Adjusted
## 2020-01-02 0.00000000
## 2020-01-02 -0.79751719
## 2020-01-03 -0.17863237
## 2020-01-06 0.06709531
## 2020-01-07 0.83464824
## 2020-01-08 1.05682335
dailyReturnIBM<-dailyIBMReturns#(IBM[[6]]/DIBM)-1
head(dailyReturnIBM)
## IBM.Adjusted
## 2020-01-02 0.00000000
## 2020-01-02 -0.79751719
## 2020-01-03 -0.17863237
## 2020-01-06 0.06709531
## 2020-01-07 0.83464824
## 2020-01-08 1.05682335
DSPY<-c(head(SPY$SPY.Adjusted,1),head(SPY$SPY.Adjusted,200)) ## fixed the er
ror
head((dailySPYReturns<-(((as.numeric(SPY$SPY.Adjusted)/DSPY) -1)*100))) ## f
ixed the error now results are full precision
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
16 of 31 10/18/2020, 4:30 AM
## SPY.Adjusted
## 2020-01-02 0.0000000
## 2020-01-02 -0.7572182
## 2020-01-03 0.3815075
## 2020-01-06 -0.2811862
## 2020-01-07 0.5329669
## 2020-01-08 0.6780544
dailyReturnSPY<-dailySPYReturns#(SPY[[6]]/DSPY)-1
lmModel<-lm(dailyReturnIBM~dailyReturnSPY)
summary(lmModel)
##
## Call:
## lm(formula = dailyReturnIBM ~ dailyReturnSPY)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7497 -0.7827 -0.1046 0.6906 7.0070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04934 0.09647 -0.511 0.61
## dailyReturnSPY 1.02923 0.04193 24.547 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.367 on 199 degrees of freedom
## Multiple R-squared: 0.7517, Adjusted R-squared: 0.7505
## F-statistic: 602.6 on 1 and 199 DF, p-value: < 2.2e-16
oldPar<-par(mfrow=c(2,1))
plot(dailyReturnIBM,color='black')
plot(dailyReturnSPY,color='blue')
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
17 of 31 10/18/2020, 4:30 AM
#abline(dailyReturnIBM,dailyReturnSPY)
cor(dailyReturnIBM,dailyReturnSPY)
## SPY.Adjusted
## IBM.Adjusted 0.8670258
#plot(dailyReturnIBM,pch=3,color='black')
#points(dailyReturnSPY,pch=3,color='blue')
library(sqldf)
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
18 of 31 10/18/2020, 4:30 AM
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
purchases<-mdf
rpt01<-sqldf('select DID,count(distinct(CID)) from mdf group by DID')
head(rpt01)
## DID count(distinct(CID))
## 1 1 14
## 2 2 19
## 3 3 6
## 4 4 16
## 5 5 16
## 6 6 9
tail(rpt01)
## DID count(distinct(CID))
## 45 45 22
## 46 46 15
## 47 47 15
## 48 48 19
## 49 49 10
## 50 50 10
sqldf('select distinct CID from mdf where DID=50')
## CID
## 1 C06
## 2 C21
## 3 C03
## 4 C11
## 5 C04
## 6 C17
## 7 C08
## 8 C14
## 9 C18
## 10 C15
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
19 of 31 10/18/2020, 4:30 AM
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
20 of 31 10/18/2020, 4:30 AM
SELECT A.p1,
A.p2,
A.p1p2c / B.p1c AS condProb
FROM (SELECT apid P1,
bpid P2,
Count(*) P1P2C
FROM (SELECT A.did AS ADID,
A.cid AS ACID,
A.pid AS APID,
B.did AS BDID,
B.cid AS BCID,
B.pid AS BPID
FROM purchases A
JOIN purchases B
ON A.cid = B.cid
AND A.did = B.did
AND A.pid < B.pid) X
GROUP BY apid,
bpid) A
JOIN (SELECT pid AS P1,
Count(*) P1C
FROM purchases
GROUP BY pid) B
ON A.p1 = B.p1
ORDER BY condprob DESC;
sqlstr<-"select A.P1,A.P2, (A.P1P2C*100)/B.P1C as condProb from ( Select API
D P1,BPID P2,count(*) P1P2C from ( select A.DID as
ADID, A.CID as ACID , A.PID as APID , B.DID as BDID, B.CID as BCID , B.PID a
s BPID from purchases A join purchases B on
A.CID=B.CID AND A.DID=B.DID AND A.PID < B.PID ) X group by APID,BPID ) A jo
in (select PID as P1, count(*) P1C from purchases group by PID) B on A.P1=B.
P1 order by condProb desc"
sqlstr
## [1] "select A.P1,A.P2, (A.P1P2C*100)/B.P1C as condProb from ( Select APID
P1,BPID P2,count(*) P1P2C from ( select A.DID asnADID, A.CID as ACID , A.P
ID as APID , B.DID as BDID, B.CID as BCID , B.PID as BPID from purchases A j
oin purchases B onnA.CID=B.CID AND A.DID=B.DID AND A.PID < B.PID ) X group
by APID,BPID ) A join (select PID as P1, count(*) P1C from purchases group b
y PID) B on A.P1=B.P1 order by condProb desc"
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
21 of 31 10/18/2020, 4:30 AM
condprob<-sqldf(sqlstr)
#condprob
head(condprob)
## P1 P2 condProb
## 1 P27 P61 31
## 2 P66 P97 31
## 3 P04 P92 29
## 4 P27 P53 29
## 5 P27 P95 29
## 6 P52 P79 29
require(rpart)
## Loading required package: rpart
require(rpart.plot)
## Loading required package: rpart.plot
require(klaR)
## Loading required package: klaR
## Loading required package: MASS
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
22 of 31 10/18/2020, 4:30 AM
set.seed(43)
tridx<-sample(1:30,20,replace=F)
trdata<-cdset[tridx,]
tstdata<-cdset[-tridx,]
trmodel.rpart<-rpart(species~.,data=trdata,minsplit=2)
rpart.plot(trmodel.rpart)
#compare this to
table(trdata$species)/nrow(trdata)
##
## chicken ostrich parrot sparrow vulture
## 0.20 0.25 0.15 0.20 0.20
predicted.trmodel.rpart<-predict(trmodel.rpart,trdata[,-5],type='class')
table(trdata[,5],predicted.trmodel.rpart)
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
23 of 31 10/18/2020, 4:30 AM
## predicted.trmodel.rpart
## chicken ostrich parrot sparrow vulture
## chicken 4 0 0 0 0
## ostrich 0 5 0 0 0
## parrot 0 0 3 0 0
## sparrow 0 0 0 4 0
## vulture 0 0 0 0 4
# removing colors that are present in test but not in train -- in small data
set
# tree cannot process that
tstdatnw<-tstdata[tstdata$color %in% trdata$color,]
tstdatnw
## nlegs can_fly height color species
## 10 2 0 25.91122 mixed chicken
## 11 2 1 39.01616 white vulture
## 15 2 1 38.47542 white vulture
## 18 2 1 22.84045 mixed parrot
## 20 2 1 21.19591 blue parrot
## 25 2 0 135.24908 grey ostrich
## 29 2 1 10.16909 brown sparrow
## 30 2 1 11.06010 brown sparrow
predicted.tstdatnw.rpart<-predict(trmodel.rpart,tstdatnw[,-5],type='class')
table(tstdatnw[,5],predicted.tstdatnw.rpart)
## predicted.tstdatnw.rpart
## chicken ostrich parrot sparrow vulture
## chicken 1 0 0 0 0
## ostrich 0 1 0 0 0
## parrot 0 0 2 0 0
## sparrow 0 0 0 2 0
## vulture 0 0 0 0 2
caret::confusionMatrix( table(tstdatnw[,5],predicted.tstdatnw.rpart))
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
24 of 31 10/18/2020, 4:30 AM
## Confusion Matrix and Statistics
##
## predicted.tstdatnw.rpart
## chicken ostrich parrot sparrow vulture
## chicken 1 0 0 0 0
## ostrich 0 1 0 0 0
## parrot 0 0 2 0 0
## sparrow 0 0 0 2 0
## vulture 0 0 0 0 2
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.6306, 1)
## No Information Rate : 0.25
## P-Value [Acc > NIR] : 1.526e-05
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: chicken Class: ostrich Class: parrot Class: s
parrow
## Sensitivity 1.000 1.000 1.00
1.00
## Specificity 1.000 1.000 1.00
1.00
## Pos Pred Value 1.000 1.000 1.00
1.00
## Neg Pred Value 1.000 1.000 1.00
1.00
## Prevalence 0.125 0.125 0.25
0.25
## Detection Rate 0.125 0.125 0.25
0.25
## Detection Prevalence 0.125 0.125 0.25
0.25
## Balanced Accuracy 1.000 1.000 1.00
1.00
## Class: vulture
## Sensitivity 1.00
## Specificity 1.00
## Pos Pred Value 1.00
## Neg Pred Value 1.00
## Prevalence 0.25
## Detection Rate 0.25
## Detection Prevalence 0.25
## Balanced Accuracy 1.00
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
25 of 31 10/18/2020, 4:30 AM
tstdatnw[,5]
## [1] "chicken" "vulture" "vulture" "parrot" "parrot" "ostrich" "sparrow"
## [8] "sparrow"
predicted.tstdatnw.rpart
## 10 11 15 18 20 25 29 30
## chicken vulture vulture parrot parrot ostrich sparrow sparrow
## Levels: chicken ostrich parrot sparrow vulture
table(as.character(tstdatnw[,5]),as.character(predicted.tstdatnw.rpart))
##
## chicken ostrich parrot sparrow vulture
## chicken 1 0 0 0 0
## ostrich 0 1 0 0 0
## parrot 0 0 2 0 0
## sparrow 0 0 0 2 0
## vulture 0 0 0 0 2
caret::confusionMatrix( table(tstdatnw[,5],predicted.tstdatnw.rpart))
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
26 of 31 10/18/2020, 4:30 AM
## Confusion Matrix and Statistics
##
## predicted.tstdatnw.rpart
## chicken ostrich parrot sparrow vulture
## chicken 1 0 0 0 0
## ostrich 0 1 0 0 0
## parrot 0 0 2 0 0
## sparrow 0 0 0 2 0
## vulture 0 0 0 0 2
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.6306, 1)
## No Information Rate : 0.25
## P-Value [Acc > NIR] : 1.526e-05
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: chicken Class: ostrich Class: parrot Class: s
parrow
## Sensitivity 1.000 1.000 1.00
1.00
## Specificity 1.000 1.000 1.00
1.00
## Pos Pred Value 1.000 1.000 1.00
1.00
## Neg Pred Value 1.000 1.000 1.00
1.00
## Prevalence 0.125 0.125 0.25
0.25
## Detection Rate 0.125 0.125 0.25
0.25
## Detection Prevalence 0.125 0.125 0.25
0.25
## Balanced Accuracy 1.000 1.000 1.00
1.00
## Class: vulture
## Sensitivity 1.00
## Specificity 1.00
## Pos Pred Value 1.00
## Neg Pred Value 1.00
## Prevalence 0.25
## Detection Rate 0.25
## Detection Prevalence 0.25
## Balanced Accuracy 1.00
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
27 of 31 10/18/2020, 4:30 AM
…
cdset
## nlegs can_fly height color species
## 1 2 0 25.000000 black chicken
## 2 2 1 40.000000 black vulture
## 3 2 1 20.000000 blue parrot
## 4 2 0 150.000000 black ostrich
## 5 2 1 10.000000 brown sparrow
## 6 2 0 21.795787 red chicken
## 7 2 0 39.459162 mixed chicken
## 8 2 0 22.981968 black chicken
## 9 2 0 17.744720 black chicken
## 10 2 0 25.911222 mixed chicken
## 11 2 1 39.016163 white vulture
## 12 2 1 40.037789 white vulture
## 13 2 1 42.251693 grey vulture
## 14 2 1 39.014589 grey vulture
## 15 2 1 38.475420 white vulture
## 16 2 1 20.316044 mixed parrot
## 17 2 1 22.712721 teal parrot
## 18 2 1 22.840455 mixed parrot
## 19 2 1 14.934359 blue parrot
## 20 2 1 21.195914 blue parrot
## 21 2 0 160.085412 black ostrich
## 22 2 0 140.594205 black ostrich
## 23 2 0 174.088029 grey ostrich
## 24 2 0 157.684178 grey ostrich
## 25 2 0 135.249085 grey ostrich
## 26 2 1 9.295639 dark cement sparrow
## 27 2 1 11.266186 dark cement sparrow
## 28 2 1 9.336063 brown sparrow
## 29 2 1 10.169087 brown sparrow
## 30 2 1 11.060101 brown sparrow
klaR::partimat(Species~.,data=iris,method="lda")
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
28 of 31 10/18/2020, 4:30 AM
cat ("the sample proportions are")
## the sample proportions are
table(cdset$species)
##
## chicken ostrich parrot sparrow vulture
## 6 6 6 6 6
table(cdset$species)/sum(table(cdset$species))
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
29 of 31 10/18/2020, 4:30 AM
##
## chicken ostrich parrot sparrow vulture
## 0.2 0.2 0.2 0.2 0.2
cat ("the training set proportions are")
## the training set proportions are
table(trdata$species)
##
## chicken ostrich parrot sparrow vulture
## 4 5 3 4 4
table(trdata$species)/sum(table(trdata$species))
##
## chicken ostrich parrot sparrow vulture
## 0.20 0.25 0.15 0.20 0.20
cat ("the test set proportions are")
## the test set proportions are
table(tstdata$species)
##
## chicken ostrich parrot sparrow vulture
## 2 1 3 2 2
table(tstdata$species)/sum(table(tstdata$species))
##
## chicken ostrich parrot sparrow vulture
## 0.2 0.1 0.3 0.2 0.2
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
30 of 31 10/18/2020, 4:30 AM
…
Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D...
31 of 31 10/18/2020, 4:30 AM

More Related Content

What's hot

Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Pythonpugpe
 
Association Rule Mining with R
Association Rule Mining with RAssociation Rule Mining with R
Association Rule Mining with RYanchang Zhao
 
Sangam 19 - Successful Applications on Autonomous
Sangam 19 - Successful Applications on AutonomousSangam 19 - Successful Applications on Autonomous
Sangam 19 - Successful Applications on AutonomousConnor McDonald
 
Sangam 19 - PLSQL still the coolest
Sangam 19 - PLSQL still the coolestSangam 19 - PLSQL still the coolest
Sangam 19 - PLSQL still the coolestConnor McDonald
 
Java EE 8新機能解説 -Bean Validation 2.0編-
Java EE 8新機能解説 -Bean Validation 2.0編-Java EE 8新機能解説 -Bean Validation 2.0編-
Java EE 8新機能解説 -Bean Validation 2.0編-Masatoshi Tada
 
Visualization of Supervised Learning with {arules} + {arulesViz}
Visualization of Supervised Learning with {arules} + {arulesViz}Visualization of Supervised Learning with {arules} + {arulesViz}
Visualization of Supervised Learning with {arules} + {arulesViz}Takashi J OZAKI
 
Simple Ways To Be A Better Programmer (OSCON 2007)
Simple Ways To Be A Better Programmer (OSCON 2007)Simple Ways To Be A Better Programmer (OSCON 2007)
Simple Ways To Be A Better Programmer (OSCON 2007)Michael Schwern
 
Clustering com numpy e cython
Clustering com numpy e cythonClustering com numpy e cython
Clustering com numpy e cythonAnderson Dantas
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with RYanchang Zhao
 
Seistech SQL code
Seistech SQL codeSeistech SQL code
Seistech SQL codeSimon Hoyle
 
Caching and tuning fun for high scalability @ LOAD2012
Caching and tuning fun for high scalability @ LOAD2012Caching and tuning fun for high scalability @ LOAD2012
Caching and tuning fun for high scalability @ LOAD2012Wim Godden
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1Ke Wei Louis
 
Sangam 19 - Analytic SQL
Sangam 19 - Analytic SQLSangam 19 - Analytic SQL
Sangam 19 - Analytic SQLConnor McDonald
 
python高级内存管理
python高级内存管理python高级内存管理
python高级内存管理rfyiamcool
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwnARUN DN
 
Parallel Computing With Dask - PyDays 2017
Parallel Computing With Dask - PyDays 2017Parallel Computing With Dask - PyDays 2017
Parallel Computing With Dask - PyDays 2017Christian Aichinger
 

What's hot (20)

R57.Php
R57.PhpR57.Php
R57.Php
 
Nop2
Nop2Nop2
Nop2
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Python
 
Association Rule Mining with R
Association Rule Mining with RAssociation Rule Mining with R
Association Rule Mining with R
 
Sangam 19 - Successful Applications on Autonomous
Sangam 19 - Successful Applications on AutonomousSangam 19 - Successful Applications on Autonomous
Sangam 19 - Successful Applications on Autonomous
 
Sangam 19 - PLSQL still the coolest
Sangam 19 - PLSQL still the coolestSangam 19 - PLSQL still the coolest
Sangam 19 - PLSQL still the coolest
 
Java EE 8新機能解説 -Bean Validation 2.0編-
Java EE 8新機能解説 -Bean Validation 2.0編-Java EE 8新機能解説 -Bean Validation 2.0編-
Java EE 8新機能解説 -Bean Validation 2.0編-
 
Visualization of Supervised Learning with {arules} + {arulesViz}
Visualization of Supervised Learning with {arules} + {arulesViz}Visualization of Supervised Learning with {arules} + {arulesViz}
Visualization of Supervised Learning with {arules} + {arulesViz}
 
Simple Ways To Be A Better Programmer (OSCON 2007)
Simple Ways To Be A Better Programmer (OSCON 2007)Simple Ways To Be A Better Programmer (OSCON 2007)
Simple Ways To Be A Better Programmer (OSCON 2007)
 
Five
FiveFive
Five
 
Clustering com numpy e cython
Clustering com numpy e cythonClustering com numpy e cython
Clustering com numpy e cython
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with R
 
Seistech SQL code
Seistech SQL codeSeistech SQL code
Seistech SQL code
 
Caching and tuning fun for high scalability @ LOAD2012
Caching and tuning fun for high scalability @ LOAD2012Caching and tuning fun for high scalability @ LOAD2012
Caching and tuning fun for high scalability @ LOAD2012
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1
 
Quick reference for solr
Quick reference for solrQuick reference for solr
Quick reference for solr
 
Sangam 19 - Analytic SQL
Sangam 19 - Analytic SQLSangam 19 - Analytic SQL
Sangam 19 - Analytic SQL
 
python高级内存管理
python高级内存管理python高级内存管理
python高级内存管理
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwn
 
Parallel Computing With Dask - PyDays 2017
Parallel Computing With Dask - PyDays 2017Parallel Computing With Dask - PyDays 2017
Parallel Computing With Dask - PyDays 2017
 

Similar to Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practitioners

Practical Introduction to Web scraping using R
Practical Introduction to Web scraping using RPractical Introduction to Web scraping using R
Practical Introduction to Web scraping using RRsquared Academy
 
Writing Readable Code with Pipes
Writing Readable Code with PipesWriting Readable Code with Pipes
Writing Readable Code with PipesRsquared Academy
 
Nsd, il tuo compagno di viaggio quando Domino va in crash
Nsd, il tuo compagno di viaggio quando Domino va in crashNsd, il tuo compagno di viaggio quando Domino va in crash
Nsd, il tuo compagno di viaggio quando Domino va in crashFabio Pignatti
 
Descriptive analytics in r programming language
Descriptive analytics in r programming languageDescriptive analytics in r programming language
Descriptive analytics in r programming languageAshwini Mathur
 
第3回 データフレームの基本操作 その1(解答付き)
第3回 データフレームの基本操作 その1(解答付き)第3回 データフレームの基本操作 その1(解答付き)
第3回 データフレームの基本操作 その1(解答付き)Wataru Shito
 
Read/Import data from flat/delimited files into R
Read/Import data from flat/delimited files into RRead/Import data from flat/delimited files into R
Read/Import data from flat/delimited files into RRsquared Academy
 
Just in time (series) - KairosDB
Just in time (series) - KairosDBJust in time (series) - KairosDB
Just in time (series) - KairosDBVictor Anjos
 
R Matrix Math Quick Reference
R Matrix Math Quick ReferenceR Matrix Math Quick Reference
R Matrix Math Quick ReferenceMark Niemann-Ross
 
Beyond php - it's not (just) about the code
Beyond php - it's not (just) about the codeBeyond php - it's not (just) about the code
Beyond php - it's not (just) about the codeWim Godden
 
Beyond PHP - it's not (just) about the code
Beyond PHP - it's not (just) about the codeBeyond PHP - it's not (just) about the code
Beyond PHP - it's not (just) about the codeWim Godden
 
Beyond php - it's not (just) about the code
Beyond php - it's not (just) about the codeBeyond php - it's not (just) about the code
Beyond php - it's not (just) about the codeWim Godden
 
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)Wataru Shito
 
Webinar: The Whys and Hows of Predictive Modelling
Webinar: The Whys and Hows of Predictive Modelling Webinar: The Whys and Hows of Predictive Modelling
Webinar: The Whys and Hows of Predictive Modelling Edureka!
 
วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...
วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...
วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...Twatchai Tangutairuang
 
Shell Scripting
Shell ScriptingShell Scripting
Shell Scriptingdcarneir
 
Beware: Sharp Tools
Beware: Sharp ToolsBeware: Sharp Tools
Beware: Sharp Toolschrismdp
 
Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...
Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...
Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...Deolu Adeleye
 
Introduction to R
Introduction to RIntroduction to R
Introduction to RStacy Irwin
 

Similar to Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practitioners (20)

Introduction to tibbles
Introduction to tibblesIntroduction to tibbles
Introduction to tibbles
 
Practical Introduction to Web scraping using R
Practical Introduction to Web scraping using RPractical Introduction to Web scraping using R
Practical Introduction to Web scraping using R
 
Writing Readable Code with Pipes
Writing Readable Code with PipesWriting Readable Code with Pipes
Writing Readable Code with Pipes
 
Nsd, il tuo compagno di viaggio quando Domino va in crash
Nsd, il tuo compagno di viaggio quando Domino va in crashNsd, il tuo compagno di viaggio quando Domino va in crash
Nsd, il tuo compagno di viaggio quando Domino va in crash
 
Descriptive analytics in r programming language
Descriptive analytics in r programming languageDescriptive analytics in r programming language
Descriptive analytics in r programming language
 
第3回 データフレームの基本操作 その1(解答付き)
第3回 データフレームの基本操作 その1(解答付き)第3回 データフレームの基本操作 その1(解答付き)
第3回 データフレームの基本操作 その1(解答付き)
 
Read/Import data from flat/delimited files into R
Read/Import data from flat/delimited files into RRead/Import data from flat/delimited files into R
Read/Import data from flat/delimited files into R
 
Just in time (series) - KairosDB
Just in time (series) - KairosDBJust in time (series) - KairosDB
Just in time (series) - KairosDB
 
R Matrix Math Quick Reference
R Matrix Math Quick ReferenceR Matrix Math Quick Reference
R Matrix Math Quick Reference
 
Beyond php - it's not (just) about the code
Beyond php - it's not (just) about the codeBeyond php - it's not (just) about the code
Beyond php - it's not (just) about the code
 
Beyond PHP - it's not (just) about the code
Beyond PHP - it's not (just) about the codeBeyond PHP - it's not (just) about the code
Beyond PHP - it's not (just) about the code
 
Beyond php - it's not (just) about the code
Beyond php - it's not (just) about the codeBeyond php - it's not (just) about the code
Beyond php - it's not (just) about the code
 
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
 
Webinar: The Whys and Hows of Predictive Modelling
Webinar: The Whys and Hows of Predictive Modelling Webinar: The Whys and Hows of Predictive Modelling
Webinar: The Whys and Hows of Predictive Modelling
 
วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...
วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...
วิเคราะห์อภิมานด้วยโมเดลสมการเชิงโครงสร้างด้วยโปรแกรม R: กรณีศึกษาภาวะผู้นำทา...
 
Shell Scripting
Shell ScriptingShell Scripting
Shell Scripting
 
Beware: Sharp Tools
Beware: Sharp ToolsBeware: Sharp Tools
Beware: Sharp Tools
 
Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...
Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...
Brief Lecture on Text Mining and Social Network Analysis with R, by Deolu Ade...
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Graphics in R
Graphics in RGraphics in R
Graphics in R
 

More from Raman Kannan

Essays on-civic-responsibilty
Essays on-civic-responsibiltyEssays on-civic-responsibilty
Essays on-civic-responsibiltyRaman Kannan
 
M12 boosting-part02
M12 boosting-part02M12 boosting-part02
M12 boosting-part02Raman Kannan
 
M10 gradient descent
M10 gradient descentM10 gradient descent
M10 gradient descentRaman Kannan
 
M08 BiasVarianceTradeoff
M08 BiasVarianceTradeoffM08 BiasVarianceTradeoff
M08 BiasVarianceTradeoffRaman Kannan
 
Chapter 04-discriminant analysis
Chapter 04-discriminant analysisChapter 04-discriminant analysis
Chapter 04-discriminant analysisRaman Kannan
 
Augmented 11022020-ieee
Augmented 11022020-ieeeAugmented 11022020-ieee
Augmented 11022020-ieeeRaman Kannan
 
Chapter 02-logistic regression
Chapter 02-logistic regressionChapter 02-logistic regression
Chapter 02-logistic regressionRaman Kannan
 
Chapter01 introductory handbook
Chapter01 introductory handbookChapter01 introductory handbook
Chapter01 introductory handbookRaman Kannan
 
A voyage-inward-02
A voyage-inward-02A voyage-inward-02
A voyage-inward-02Raman Kannan
 
Evaluating classifierperformance ml-cs6923
Evaluating classifierperformance ml-cs6923Evaluating classifierperformance ml-cs6923
Evaluating classifierperformance ml-cs6923Raman Kannan
 
A data scientist's study plan
A data scientist's study planA data scientist's study plan
A data scientist's study planRaman Kannan
 
Cognitive Assistants
Cognitive AssistantsCognitive Assistants
Cognitive AssistantsRaman Kannan
 
Essay on-data-analysis
Essay on-data-analysisEssay on-data-analysis
Essay on-data-analysisRaman Kannan
 
How to-run-ols-diagnostics-02
How to-run-ols-diagnostics-02How to-run-ols-diagnostics-02
How to-run-ols-diagnostics-02Raman Kannan
 
Sdr dodd frankbirdseyeview
Sdr dodd frankbirdseyeviewSdr dodd frankbirdseyeview
Sdr dodd frankbirdseyeviewRaman Kannan
 

More from Raman Kannan (20)

Essays on-civic-responsibilty
Essays on-civic-responsibiltyEssays on-civic-responsibilty
Essays on-civic-responsibilty
 
M12 boosting-part02
M12 boosting-part02M12 boosting-part02
M12 boosting-part02
 
M10 gradient descent
M10 gradient descentM10 gradient descent
M10 gradient descent
 
M06 tree
M06 treeM06 tree
M06 tree
 
M07 svm
M07 svmM07 svm
M07 svm
 
M08 BiasVarianceTradeoff
M08 BiasVarianceTradeoffM08 BiasVarianceTradeoff
M08 BiasVarianceTradeoff
 
Chapter 05 k nn
Chapter 05 k nnChapter 05 k nn
Chapter 05 k nn
 
Chapter 04-discriminant analysis
Chapter 04-discriminant analysisChapter 04-discriminant analysis
Chapter 04-discriminant analysis
 
M03 nb-02
M03 nb-02M03 nb-02
M03 nb-02
 
Augmented 11022020-ieee
Augmented 11022020-ieeeAugmented 11022020-ieee
Augmented 11022020-ieee
 
Chapter 02-logistic regression
Chapter 02-logistic regressionChapter 02-logistic regression
Chapter 02-logistic regression
 
Chapter01 introductory handbook
Chapter01 introductory handbookChapter01 introductory handbook
Chapter01 introductory handbook
 
A voyage-inward-02
A voyage-inward-02A voyage-inward-02
A voyage-inward-02
 
Evaluating classifierperformance ml-cs6923
Evaluating classifierperformance ml-cs6923Evaluating classifierperformance ml-cs6923
Evaluating classifierperformance ml-cs6923
 
A data scientist's study plan
A data scientist's study planA data scientist's study plan
A data scientist's study plan
 
Cognitive Assistants
Cognitive AssistantsCognitive Assistants
Cognitive Assistants
 
Essay on-data-analysis
Essay on-data-analysisEssay on-data-analysis
Essay on-data-analysis
 
Joy of Unix
Joy of UnixJoy of Unix
Joy of Unix
 
How to-run-ols-diagnostics-02
How to-run-ols-diagnostics-02How to-run-ols-diagnostics-02
How to-run-ols-diagnostics-02
 
Sdr dodd frankbirdseyeview
Sdr dodd frankbirdseyeviewSdr dodd frankbirdseyeview
Sdr dodd frankbirdseyeview
 

Recently uploaded

Aminabad Call Girl Agent 9548273370 , Call Girls Service Lucknow
Aminabad Call Girl Agent 9548273370 , Call Girls Service LucknowAminabad Call Girl Agent 9548273370 , Call Girls Service Lucknow
Aminabad Call Girl Agent 9548273370 , Call Girls Service Lucknowmakika9823
 
RA-11058_IRR-COMPRESS Do 198 series of 1998
RA-11058_IRR-COMPRESS Do 198 series of 1998RA-11058_IRR-COMPRESS Do 198 series of 1998
RA-11058_IRR-COMPRESS Do 198 series of 1998YohFuh
 
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Serviceranjana rawat
 
VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...
VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...
VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...Suhani Kapoor
 
PKS-TGC-1084-630 - Stage 1 Proposal.pptx
PKS-TGC-1084-630 - Stage 1 Proposal.pptxPKS-TGC-1084-630 - Stage 1 Proposal.pptx
PKS-TGC-1084-630 - Stage 1 Proposal.pptxPramod Kumar Srivastava
 
Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...
Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...
Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...Sapana Sha
 
VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...
VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...
VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...Suhani Kapoor
 
High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...
High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...
High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...soniya singh
 
定制英国白金汉大学毕业证(UCB毕业证书) 成绩单原版一比一
定制英国白金汉大学毕业证(UCB毕业证书)																			成绩单原版一比一定制英国白金汉大学毕业证(UCB毕业证书)																			成绩单原版一比一
定制英国白金汉大学毕业证(UCB毕业证书) 成绩单原版一比一ffjhghh
 
Call Girls In Mahipalpur O9654467111 Escorts Service
Call Girls In Mahipalpur O9654467111  Escorts ServiceCall Girls In Mahipalpur O9654467111  Escorts Service
Call Girls In Mahipalpur O9654467111 Escorts ServiceSapana Sha
 
Invezz.com - Grow your wealth with trading signals
Invezz.com - Grow your wealth with trading signalsInvezz.com - Grow your wealth with trading signals
Invezz.com - Grow your wealth with trading signalsInvezz1
 
Log Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptxLog Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptxJohnnyPlasten
 
Predicting Employee Churn: A Data-Driven Approach Project Presentation
Predicting Employee Churn: A Data-Driven Approach Project PresentationPredicting Employee Churn: A Data-Driven Approach Project Presentation
Predicting Employee Churn: A Data-Driven Approach Project PresentationBoston Institute of Analytics
 
VIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service Amravati
VIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service AmravatiVIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service Amravati
VIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service AmravatiSuhani Kapoor
 
Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...
Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...
Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...shivangimorya083
 
From idea to production in a day – Leveraging Azure ML and Streamlit to build...
From idea to production in a day – Leveraging Azure ML and Streamlit to build...From idea to production in a day – Leveraging Azure ML and Streamlit to build...
From idea to production in a day – Leveraging Azure ML and Streamlit to build...Florian Roscheck
 
Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...
Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...
Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...dajasot375
 
Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...
Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...
Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...Jack DiGiovanna
 
100-Concepts-of-AI by Anupama Kate .pptx
100-Concepts-of-AI by Anupama Kate .pptx100-Concepts-of-AI by Anupama Kate .pptx
100-Concepts-of-AI by Anupama Kate .pptxAnupama Kate
 
B2 Creative Industry Response Evaluation.docx
B2 Creative Industry Response Evaluation.docxB2 Creative Industry Response Evaluation.docx
B2 Creative Industry Response Evaluation.docxStephen266013
 

Recently uploaded (20)

Aminabad Call Girl Agent 9548273370 , Call Girls Service Lucknow
Aminabad Call Girl Agent 9548273370 , Call Girls Service LucknowAminabad Call Girl Agent 9548273370 , Call Girls Service Lucknow
Aminabad Call Girl Agent 9548273370 , Call Girls Service Lucknow
 
RA-11058_IRR-COMPRESS Do 198 series of 1998
RA-11058_IRR-COMPRESS Do 198 series of 1998RA-11058_IRR-COMPRESS Do 198 series of 1998
RA-11058_IRR-COMPRESS Do 198 series of 1998
 
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
(PARI) Call Girls Wanowrie ( 7001035870 ) HI-Fi Pune Escorts Service
 
VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...
VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...
VIP High Class Call Girls Jamshedpur Anushka 8250192130 Independent Escort Se...
 
PKS-TGC-1084-630 - Stage 1 Proposal.pptx
PKS-TGC-1084-630 - Stage 1 Proposal.pptxPKS-TGC-1084-630 - Stage 1 Proposal.pptx
PKS-TGC-1084-630 - Stage 1 Proposal.pptx
 
Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...
Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...
Saket, (-DELHI )+91-9654467111-(=)CHEAP Call Girls in Escorts Service Saket C...
 
VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...
VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...
VIP High Profile Call Girls Amravati Aarushi 8250192130 Independent Escort Se...
 
High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...
High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...
High Class Call Girls Noida Sector 39 Aarushi 🔝8264348440🔝 Independent Escort...
 
定制英国白金汉大学毕业证(UCB毕业证书) 成绩单原版一比一
定制英国白金汉大学毕业证(UCB毕业证书)																			成绩单原版一比一定制英国白金汉大学毕业证(UCB毕业证书)																			成绩单原版一比一
定制英国白金汉大学毕业证(UCB毕业证书) 成绩单原版一比一
 
Call Girls In Mahipalpur O9654467111 Escorts Service
Call Girls In Mahipalpur O9654467111  Escorts ServiceCall Girls In Mahipalpur O9654467111  Escorts Service
Call Girls In Mahipalpur O9654467111 Escorts Service
 
Invezz.com - Grow your wealth with trading signals
Invezz.com - Grow your wealth with trading signalsInvezz.com - Grow your wealth with trading signals
Invezz.com - Grow your wealth with trading signals
 
Log Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptxLog Analysis using OSSEC sasoasasasas.pptx
Log Analysis using OSSEC sasoasasasas.pptx
 
Predicting Employee Churn: A Data-Driven Approach Project Presentation
Predicting Employee Churn: A Data-Driven Approach Project PresentationPredicting Employee Churn: A Data-Driven Approach Project Presentation
Predicting Employee Churn: A Data-Driven Approach Project Presentation
 
VIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service Amravati
VIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service AmravatiVIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service Amravati
VIP Call Girls in Amravati Aarohi 8250192130 Independent Escort Service Amravati
 
Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...
Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...
Full night 🥵 Call Girls Delhi New Friends Colony {9711199171} Sanya Reddy ✌️o...
 
From idea to production in a day – Leveraging Azure ML and Streamlit to build...
From idea to production in a day – Leveraging Azure ML and Streamlit to build...From idea to production in a day – Leveraging Azure ML and Streamlit to build...
From idea to production in a day – Leveraging Azure ML and Streamlit to build...
 
Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...
Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...
Indian Call Girls in Abu Dhabi O5286O24O8 Call Girls in Abu Dhabi By Independ...
 
Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...
Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...
Building on a FAIRly Strong Foundation to Connect Academic Research to Transl...
 
100-Concepts-of-AI by Anupama Kate .pptx
100-Concepts-of-AI by Anupama Kate .pptx100-Concepts-of-AI by Anupama Kate .pptx
100-Concepts-of-AI by Anupama Kate .pptx
 
B2 Creative Industry Response Evaluation.docx
B2 Creative Industry Response Evaluation.docxB2 Creative Industry Response Evaluation.docx
B2 Creative Industry Response Evaluation.docx
 

Chapter 2: R tutorial Handbook for Data Science and Machine Learning Practitioners

  • 2. https://www.r-project.org/ Essential tools: RGUI Basic R processing, RScript to run batch scripts, RCMD (to install in Unix/Linux) variants RStudio is a compelling tool – though defer RStudio until you know R very we ll – tools are limiting you – bad idea to start with RStudio to learn the la nguage, IMHO. Reference Sites: (that I often use, don’t leave home without it) https://www.r-bloggers.com https://nabble.com/ http://rfunction.com https://stackoverflow.com/ https://stats.stackexchange.com/ https://www.datasciencemadesimple.com/ http://www.r-tutor.com/ There are thousands if not more, useful R sites you can learn from Again to do what you want to get done…otherwise you will be sucked into vortex.. … Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 2 of 31 10/18/2020, 4:30 AM
  • 3. ls() ## character(0) X<-5 7->Y ifelse(X<Y,'X is Less than Y', 'X is atleast equal to Y') ## [1] "X is Less than Y" vec<-1:13 is.vector(vec) ## [1] TRUE vec[4] ## [1] 4 by2<-seq(1,13,2) (xy2<-seq(1,13,2)) ## [1] 1 3 5 7 9 11 13 xy2[4] ## [1] 7 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 3 of 31 10/18/2020, 4:30 AM
  • 4. is.vector(xy2[4]) ## [1] TRUE length(xy2[4]) ## [1] 1 vec[vec %in% by2] ## [1] 1 3 5 7 9 11 13 (xyeven<-seq(0,13,2)) ## [1] 0 2 4 6 8 10 12 length(vec) ## [1] 13 mean(vec) ## [1] 7 sd(vec) ## [1] 3.89444 sum(vec) ## [1] 91 cumprod(vec) ## [1] 1 2 6 24 120 720 ## [7] 5040 40320 362880 3628800 39916800 479001600 ## [13] 6227020800 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 4 of 31 10/18/2020, 4:30 AM
  • 5. L<-list(X=5,reason="I like 5") L ## $X ## [1] 5 ## ## $reason ## [1] "I like 5" mx<-matrix(c(rep(0,5),seq(1:5)),nrow=2,ncol=5) # fixed the error now the mx should have correct values not ALL zeros mx ## [,1] [,2] [,3] [,4] [,5] ## [1,] 0 0 0 2 4 ## [2,] 0 0 1 3 5 mxbyr<-matrix(c(rep(0,5),seq(1:5)),nrow=2,ncol=5,byrow=TRUE) mxbyr ## [,1] [,2] [,3] [,4] [,5] ## [1,] 0 0 0 0 0 ## [2,] 1 2 3 4 5 dd <- structure(list( population = c(4.560667108, 1.275920972) ,continents = c('Asia', 'Africa')) ,.Names = c("Pop", "Continent") ,row.names = c(NA, -2L) ,class = "data.frame") dd ## Pop Continent ## 1 4.560667 Asia ## 2 1.275921 Africa Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 5 of 31 10/18/2020, 4:30 AM
  • 6. dd<-rbind(dd,c(4.1570842,'Oceania')) dd ## Pop Continent ## 1 4.560667108 Asia ## 2 1.275920972 Africa ## 3 4.1570842 Oceania dd<-cbind(dd,density=c(100,36,4)) dd<-rbind(dd,c(0,'pangea')) dd ## Pop Continent density ## 1 4.560667108 Asia 100 ## 2 1.275920972 Africa 36 ## 3 4.1570842 Oceania 4 ## 4 0 pangea 0 which(dd$Pop==0) ## [1] 4 dd<-dd[-which(dd$Pop==0),] dd ## Pop Continent density ## 1 4.560667108 Asia 100 ## 2 1.275920972 Africa 36 ## 3 4.1570842 Oceania 4 birds<-data.frame(nlegs=rep(2,5),can_fly=c(0,1,1,0,1),height=c(25,40,20,150, 10), color=c('black','black','blue','black','brown')) birds2<-cbind(birds,c('chicken','vulture','parrot','ostrich','sparrow')) names(birds2)<-c('nlegs','can_fly','height','color','species') birds2 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 6 of 31 10/18/2020, 4:30 AM
  • 7. ## nlegs can_fly height color species ## 1 2 0 25 black chicken ## 2 2 1 40 black vulture ## 3 2 1 20 blue parrot ## 4 2 0 150 black ostrich ## 5 2 1 10 brown sparrow … chickencolors<-c('black','white','red','mixed') vulturecolors<-c('grey','black','white') parrotcolors<-c('teal','green','blue','mixed','pink') ostrichcolors<-c('grey','black') sparrowcolors<-c('dark cement','brown') hchicken<-sample(rnorm(10,25,6),5) hvulture<-sample(rnorm(10,40,4),5) hparrot<-sample(rnorm(10,20,2),5) hostrich<-sample(rnorm(10,150,20),5) hsparrow<-sample(rnorm(10,10,1),5) cdset<-rbind(birds2,data.frame(nlegs=rep(2,5),can_fly=rep(0,5), height=hchic ken, color=sample(chickencolors,5,replace=T),species=rep('chicken',5)), data.frame(nlegs=rep(2,5),can_fly=rep(1,5), height=hvulture, color=sample(vulturecolors,5,replace=T),species=rep('vulture',5)), data.frame(nlegs=rep(2,5),can_fly=rep(1,5), height=hparrot, color=sample(parrotcolors,5,replace=T),species=rep('parrot',5)), data.frame(nlegs=rep(2,5),can_fly=rep(0,5), height=hostrich, color=sample(ostrichcolors,5,replace=T),species=rep('ostrich',5)), data.frame(nlegs=rep(2,5),can_fly=rep(1,5), height=hsparrow, color=sample(sparrowcolors,5,replace=T),species=rep('sparrow',5))) cdset # just print out the contents Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 7 of 31 10/18/2020, 4:30 AM
  • 8. ## nlegs can_fly height color species ## 1 2 0 25.000000 black chicken ## 2 2 1 40.000000 black vulture ## 3 2 1 20.000000 blue parrot ## 4 2 0 150.000000 black ostrich ## 5 2 1 10.000000 brown sparrow ## 6 2 0 21.795787 red chicken ## 7 2 0 39.459162 mixed chicken ## 8 2 0 22.981968 black chicken ## 9 2 0 17.744720 black chicken ## 10 2 0 25.911222 mixed chicken ## 11 2 1 39.016163 white vulture ## 12 2 1 40.037789 white vulture ## 13 2 1 42.251693 grey vulture ## 14 2 1 39.014589 grey vulture ## 15 2 1 38.475420 white vulture ## 16 2 1 20.316044 mixed parrot ## 17 2 1 22.712721 teal parrot ## 18 2 1 22.840455 mixed parrot ## 19 2 1 14.934359 blue parrot ## 20 2 1 21.195914 blue parrot ## 21 2 0 160.085412 black ostrich ## 22 2 0 140.594205 black ostrich ## 23 2 0 174.088029 grey ostrich ## 24 2 0 157.684178 grey ostrich ## 25 2 0 135.249085 grey ostrich ## 26 2 1 9.295639 dark cement sparrow ## 27 2 1 11.266186 dark cement sparrow ## 28 2 1 9.336063 brown sparrow ## 29 2 1 10.169087 brown sparrow ## 30 2 1 11.060101 brown sparrow dim(cdset) # what are the dimensions ## [1] 30 5 nrow(cdset) # number of rows ## [1] 30 ncol(cdset) # number of columns ## [1] 5 names(cdset) # data.frames have names matrices dont Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 8 of 31 10/18/2020, 4:30 AM
  • 9. ## [1] "nlegs" "can_fly" "height" "color" "species" head(cdset) ## nlegs can_fly height color species ## 1 2 0 25.00000 black chicken ## 2 2 1 40.00000 black vulture ## 3 2 1 20.00000 blue parrot ## 4 2 0 150.00000 black ostrich ## 5 2 1 10.00000 brown sparrow ## 6 2 0 21.79579 red chicken tail(cdset) ## nlegs can_fly height color species ## 25 2 0 135.249085 grey ostrich ## 26 2 1 9.295639 dark cement sparrow ## 27 2 1 11.266186 dark cement sparrow ## 28 2 1 9.336063 brown sparrow ## 29 2 1 10.169087 brown sparrow ## 30 2 1 11.060101 brown sparrow row.names(cdset) ## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "1 4" "15" ## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "2 9" "30" cdset[1,3]# just one cell ## [1] 25 cdset[1,] # entire observation ## nlegs can_fly height color species ## 1 2 0 25 black chicken cdset[,3]# entire column or the feature Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 9 of 31 10/18/2020, 4:30 AM
  • 10. ## [1] 25.000000 40.000000 20.000000 150.000000 10.000000 21.795787 ## [7] 39.459162 22.981968 17.744720 25.911222 39.016163 40.037789 ## [13] 42.251693 39.014589 38.475420 20.316044 22.712721 22.840455 ## [19] 14.934359 21.195914 160.085412 140.594205 174.088029 157.684178 ## [25] 135.249085 9.295639 11.266186 9.336063 10.169087 11.060101 cdset[cdset$species=='sparrow',]# review just the sparrow data.entire observ ations ## nlegs can_fly height color species ## 5 2 1 10.000000 brown sparrow ## 26 2 1 9.295639 dark cement sparrow ## 27 2 1 11.266186 dark cement sparrow ## 28 2 1 9.336063 brown sparrow ## 29 2 1 10.169087 brown sparrow ## 30 2 1 11.060101 brown sparrow cdset[cdset$species=='sparrow',c(1,3,5)]# just some of the columns ## nlegs height species ## 5 2 10.000000 sparrow ## 26 2 9.295639 sparrow ## 27 2 11.266186 sparrow ## 28 2 9.336063 sparrow ## 29 2 10.169087 sparrow ## 30 2 11.060101 sparrow cdset[cdset$species=='sparrow',c('nlegs','species')]# or by column names ## nlegs species ## 5 2 sparrow ## 26 2 sparrow ## 27 2 sparrow ## 28 2 sparrow ## 29 2 sparrow ## 30 2 sparrow cdset[cdset$species=='sparrow',-which(names(cdset)=='species')]# filter OUT some columns Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 10 of 31 10/18/2020, 4:30 AM
  • 11. ## nlegs can_fly height color ## 5 2 1 10.000000 brown ## 26 2 1 9.295639 dark cement ## 27 2 1 11.266186 dark cement ## 28 2 1 9.336063 brown ## 29 2 1 10.169087 brown ## 30 2 1 11.060101 brown cdset[cdset$species=='sparrow',-which(names(cdset)%in%c('nlegs','species'))] ## can_fly height color ## 5 1 10.000000 brown ## 26 1 9.295639 dark cement ## 27 1 11.266186 dark cement ## 28 1 9.336063 brown ## 29 1 10.169087 brown ## 30 1 11.060101 brown … … … lapply(1:3,FUN=function(x)x*x) -> exl exl ## [[1]] ## [1] 1 ## ## [[2]] ## [1] 4 ## ## [[3]] ## [1] 9 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 11 of 31 10/18/2020, 4:30 AM
  • 12. mx ## [,1] [,2] [,3] [,4] [,5] ## [1,] 0 0 0 2 4 ## [2,] 0 0 1 3 5 apply(mx,2,sd) ## [1] 0.0000000 0.0000000 0.7071068 0.7071068 0.7071068 (mx1<-sapply(mx,FUN=function(x)x+1)) ## [1] 1 1 1 1 1 2 3 4 5 6 … … prodidlist<-c(paste("P0",1:9,sep=''),paste("P",10:99,sep='')) cidlist<-c(paste("C0",1:9,sep=''),paste("C",10:22,sep='')) (df<-data.frame(DID=1,CID="C01", PID=sample(prodidlist,sample(1:20,1),replace=F), stringsAsFactors=F)) ## DID CID PID ## 1 1 C01 P88 ## 2 1 C01 P53 ## 3 1 C01 P86 ## 4 1 C01 P90 ## 5 1 C01 P21 ## 6 1 C01 P34 (sample(prodidlist,sample(1:20,1),replace=F)) ## [1] "P75" "P34" "P40" "P27" "P71" "P06" "P72" Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 12 of 31 10/18/2020, 4:30 AM
  • 13. mdf<-do.call('rbind',lapply(1:50,FUN=function(x) { cidlist<-sample(cidlist,sample(1:length(cidlist),1),replace=F) dfi<-do.call('rbind',lapply(cidlist,FUN= function(cid)data.frame(DID=x,CID=cid, PID=sample(prodidlist,sample(1:20,1),replace=F)))) } )) write.table(mdf, file='purchases.csv', sep=',',row.names=F, col.names=T, quote=F) head(mdf) ## DID CID PID ## 1 1 C19 P19 ## 2 1 C19 P33 ## 3 1 C19 P91 ## 4 1 C19 P78 ## 5 1 C19 P66 ## 6 1 C19 P64 nrow(mdf) ## [1] 6436 … read.csv('purchases.csv',head=T,sep=',')->rmdf titanic<-read.csv("http://christianherta.de/lehre/dataScience/machineLearnin g/data/titanic-train.csv",header=T) head(titanic) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 13 of 31 10/18/2020, 4:30 AM
  • 14. ## PassengerId Survived Pclass ## 1 1 0 3 ## 2 2 1 1 ## 3 3 1 3 ## 4 4 1 1 ## 5 5 0 3 ## 6 6 0 3 ## Name Sex Age SibSp Pa rch ## 1 Braund, Mr. Owen Harris male 22 1 0 ## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0 ## 3 Heikkinen, Miss. Laina female 26 0 0 ## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0 ## 5 Allen, Mr. William Henry male 35 0 0 ## 6 Moran, Mr. James male NA 0 0 ## Ticket Fare Cabin Embarked ## 1 A/5 21171 7.2500 S ## 2 PC 17599 71.2833 C85 C ## 3 STON/O2. 3101282 7.9250 S ## 4 113803 53.1000 C123 S ## 5 373450 8.0500 S ## 6 330877 8.4583 Q dim(titanic) ## [1] 891 12 table(mdf==rmdf) ## ## TRUE ## 19308 cumprod(dim(mdf)) #rows ^ columns the number of elements all of them match a s they should Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 14 of 31 10/18/2020, 4:30 AM
  • 15. ## [1] 6436 19308 nrow(rmdf)* ncol(rmdf) ## [1] 19308 quantmod::getSymbols(c("IBM","SPY"),from='2020-01-01') ## Registered S3 method overwritten by 'quantmod': ## method from ## as.zoo.data.frame zoo ## 'getSymbols' currently uses auto.assign=TRUE by default, but will ## use auto.assign=FALSE in 0.5-0. You will still be able to use ## 'loadSymbols' to automatically load data. getOption("getSymbols.env") ## and getOption("getSymbols.auto.assign") will still be checked for ## alternate defaults. ## ## This message is shown once per session and may be disabled by setting ## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details. ## [1] "IBM" "SPY" dim(IBM) ## [1] 201 6 #dim(JNJ) #quantmod::getSymbols(c("SPY"),from='2020-01-01') head(IBM) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 15 of 31 10/18/2020, 4:30 AM
  • 16. ## IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted ## 2020-01-02 135.00 135.92 134.77 135.42 3148600 130.5377 ## 2020-01-03 133.57 134.86 133.56 134.34 2373700 129.4967 ## 2020-01-06 133.42 134.24 133.20 134.10 2425500 129.2654 ## 2020-01-07 133.69 134.96 133.40 134.19 3090800 129.3521 ## 2020-01-08 134.51 135.86 133.92 135.31 4346000 130.4317 ## 2020-01-09 135.74 136.79 135.31 136.74 3730600 131.8102 head(IBM$IBM.Adjusted) ## IBM.Adjusted ## 2020-01-02 130.5377 ## 2020-01-03 129.4967 ## 2020-01-06 129.2654 ## 2020-01-07 129.3521 ## 2020-01-08 130.4317 ## 2020-01-09 131.8102 DIBM<-c(head(IBM$IBM.Adjusted,1),head(IBM$IBM.Adjusted,200)) ## fixed the er ror head((dailyIBMReturns<-(((as.numeric(IBM$IBM.Adjusted)/DIBM) -1)*100))) ## f ixed the error now results are full precision ## IBM.Adjusted ## 2020-01-02 0.00000000 ## 2020-01-02 -0.79751719 ## 2020-01-03 -0.17863237 ## 2020-01-06 0.06709531 ## 2020-01-07 0.83464824 ## 2020-01-08 1.05682335 dailyReturnIBM<-dailyIBMReturns#(IBM[[6]]/DIBM)-1 head(dailyReturnIBM) ## IBM.Adjusted ## 2020-01-02 0.00000000 ## 2020-01-02 -0.79751719 ## 2020-01-03 -0.17863237 ## 2020-01-06 0.06709531 ## 2020-01-07 0.83464824 ## 2020-01-08 1.05682335 DSPY<-c(head(SPY$SPY.Adjusted,1),head(SPY$SPY.Adjusted,200)) ## fixed the er ror head((dailySPYReturns<-(((as.numeric(SPY$SPY.Adjusted)/DSPY) -1)*100))) ## f ixed the error now results are full precision Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 16 of 31 10/18/2020, 4:30 AM
  • 17. ## SPY.Adjusted ## 2020-01-02 0.0000000 ## 2020-01-02 -0.7572182 ## 2020-01-03 0.3815075 ## 2020-01-06 -0.2811862 ## 2020-01-07 0.5329669 ## 2020-01-08 0.6780544 dailyReturnSPY<-dailySPYReturns#(SPY[[6]]/DSPY)-1 lmModel<-lm(dailyReturnIBM~dailyReturnSPY) summary(lmModel) ## ## Call: ## lm(formula = dailyReturnIBM ~ dailyReturnSPY) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.7497 -0.7827 -0.1046 0.6906 7.0070 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.04934 0.09647 -0.511 0.61 ## dailyReturnSPY 1.02923 0.04193 24.547 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.367 on 199 degrees of freedom ## Multiple R-squared: 0.7517, Adjusted R-squared: 0.7505 ## F-statistic: 602.6 on 1 and 199 DF, p-value: < 2.2e-16 oldPar<-par(mfrow=c(2,1)) plot(dailyReturnIBM,color='black') plot(dailyReturnSPY,color='blue') Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 17 of 31 10/18/2020, 4:30 AM
  • 18. #abline(dailyReturnIBM,dailyReturnSPY) cor(dailyReturnIBM,dailyReturnSPY) ## SPY.Adjusted ## IBM.Adjusted 0.8670258 #plot(dailyReturnIBM,pch=3,color='black') #points(dailyReturnSPY,pch=3,color='blue') library(sqldf) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 18 of 31 10/18/2020, 4:30 AM
  • 19. ## Loading required package: gsubfn ## Loading required package: proto ## Loading required package: RSQLite purchases<-mdf rpt01<-sqldf('select DID,count(distinct(CID)) from mdf group by DID') head(rpt01) ## DID count(distinct(CID)) ## 1 1 14 ## 2 2 19 ## 3 3 6 ## 4 4 16 ## 5 5 16 ## 6 6 9 tail(rpt01) ## DID count(distinct(CID)) ## 45 45 22 ## 46 46 15 ## 47 47 15 ## 48 48 19 ## 49 49 10 ## 50 50 10 sqldf('select distinct CID from mdf where DID=50') ## CID ## 1 C06 ## 2 C21 ## 3 C03 ## 4 C11 ## 5 C04 ## 6 C17 ## 7 C08 ## 8 C14 ## 9 C18 ## 10 C15 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 19 of 31 10/18/2020, 4:30 AM
  • 21. SELECT A.p1, A.p2, A.p1p2c / B.p1c AS condProb FROM (SELECT apid P1, bpid P2, Count(*) P1P2C FROM (SELECT A.did AS ADID, A.cid AS ACID, A.pid AS APID, B.did AS BDID, B.cid AS BCID, B.pid AS BPID FROM purchases A JOIN purchases B ON A.cid = B.cid AND A.did = B.did AND A.pid < B.pid) X GROUP BY apid, bpid) A JOIN (SELECT pid AS P1, Count(*) P1C FROM purchases GROUP BY pid) B ON A.p1 = B.p1 ORDER BY condprob DESC; sqlstr<-"select A.P1,A.P2, (A.P1P2C*100)/B.P1C as condProb from ( Select API D P1,BPID P2,count(*) P1P2C from ( select A.DID as ADID, A.CID as ACID , A.PID as APID , B.DID as BDID, B.CID as BCID , B.PID a s BPID from purchases A join purchases B on A.CID=B.CID AND A.DID=B.DID AND A.PID < B.PID ) X group by APID,BPID ) A jo in (select PID as P1, count(*) P1C from purchases group by PID) B on A.P1=B. P1 order by condProb desc" sqlstr ## [1] "select A.P1,A.P2, (A.P1P2C*100)/B.P1C as condProb from ( Select APID P1,BPID P2,count(*) P1P2C from ( select A.DID asnADID, A.CID as ACID , A.P ID as APID , B.DID as BDID, B.CID as BCID , B.PID as BPID from purchases A j oin purchases B onnA.CID=B.CID AND A.DID=B.DID AND A.PID < B.PID ) X group by APID,BPID ) A join (select PID as P1, count(*) P1C from purchases group b y PID) B on A.P1=B.P1 order by condProb desc" Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 21 of 31 10/18/2020, 4:30 AM
  • 22. condprob<-sqldf(sqlstr) #condprob head(condprob) ## P1 P2 condProb ## 1 P27 P61 31 ## 2 P66 P97 31 ## 3 P04 P92 29 ## 4 P27 P53 29 ## 5 P27 P95 29 ## 6 P52 P79 29 require(rpart) ## Loading required package: rpart require(rpart.plot) ## Loading required package: rpart.plot require(klaR) ## Loading required package: klaR ## Loading required package: MASS Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 22 of 31 10/18/2020, 4:30 AM
  • 23. set.seed(43) tridx<-sample(1:30,20,replace=F) trdata<-cdset[tridx,] tstdata<-cdset[-tridx,] trmodel.rpart<-rpart(species~.,data=trdata,minsplit=2) rpart.plot(trmodel.rpart) #compare this to table(trdata$species)/nrow(trdata) ## ## chicken ostrich parrot sparrow vulture ## 0.20 0.25 0.15 0.20 0.20 predicted.trmodel.rpart<-predict(trmodel.rpart,trdata[,-5],type='class') table(trdata[,5],predicted.trmodel.rpart) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 23 of 31 10/18/2020, 4:30 AM
  • 24. ## predicted.trmodel.rpart ## chicken ostrich parrot sparrow vulture ## chicken 4 0 0 0 0 ## ostrich 0 5 0 0 0 ## parrot 0 0 3 0 0 ## sparrow 0 0 0 4 0 ## vulture 0 0 0 0 4 # removing colors that are present in test but not in train -- in small data set # tree cannot process that tstdatnw<-tstdata[tstdata$color %in% trdata$color,] tstdatnw ## nlegs can_fly height color species ## 10 2 0 25.91122 mixed chicken ## 11 2 1 39.01616 white vulture ## 15 2 1 38.47542 white vulture ## 18 2 1 22.84045 mixed parrot ## 20 2 1 21.19591 blue parrot ## 25 2 0 135.24908 grey ostrich ## 29 2 1 10.16909 brown sparrow ## 30 2 1 11.06010 brown sparrow predicted.tstdatnw.rpart<-predict(trmodel.rpart,tstdatnw[,-5],type='class') table(tstdatnw[,5],predicted.tstdatnw.rpart) ## predicted.tstdatnw.rpart ## chicken ostrich parrot sparrow vulture ## chicken 1 0 0 0 0 ## ostrich 0 1 0 0 0 ## parrot 0 0 2 0 0 ## sparrow 0 0 0 2 0 ## vulture 0 0 0 0 2 caret::confusionMatrix( table(tstdatnw[,5],predicted.tstdatnw.rpart)) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 24 of 31 10/18/2020, 4:30 AM
  • 25. ## Confusion Matrix and Statistics ## ## predicted.tstdatnw.rpart ## chicken ostrich parrot sparrow vulture ## chicken 1 0 0 0 0 ## ostrich 0 1 0 0 0 ## parrot 0 0 2 0 0 ## sparrow 0 0 0 2 0 ## vulture 0 0 0 0 2 ## ## Overall Statistics ## ## Accuracy : 1 ## 95% CI : (0.6306, 1) ## No Information Rate : 0.25 ## P-Value [Acc > NIR] : 1.526e-05 ## ## Kappa : 1 ## ## Mcnemar's Test P-Value : NA ## ## Statistics by Class: ## ## Class: chicken Class: ostrich Class: parrot Class: s parrow ## Sensitivity 1.000 1.000 1.00 1.00 ## Specificity 1.000 1.000 1.00 1.00 ## Pos Pred Value 1.000 1.000 1.00 1.00 ## Neg Pred Value 1.000 1.000 1.00 1.00 ## Prevalence 0.125 0.125 0.25 0.25 ## Detection Rate 0.125 0.125 0.25 0.25 ## Detection Prevalence 0.125 0.125 0.25 0.25 ## Balanced Accuracy 1.000 1.000 1.00 1.00 ## Class: vulture ## Sensitivity 1.00 ## Specificity 1.00 ## Pos Pred Value 1.00 ## Neg Pred Value 1.00 ## Prevalence 0.25 ## Detection Rate 0.25 ## Detection Prevalence 0.25 ## Balanced Accuracy 1.00 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 25 of 31 10/18/2020, 4:30 AM
  • 26. tstdatnw[,5] ## [1] "chicken" "vulture" "vulture" "parrot" "parrot" "ostrich" "sparrow" ## [8] "sparrow" predicted.tstdatnw.rpart ## 10 11 15 18 20 25 29 30 ## chicken vulture vulture parrot parrot ostrich sparrow sparrow ## Levels: chicken ostrich parrot sparrow vulture table(as.character(tstdatnw[,5]),as.character(predicted.tstdatnw.rpart)) ## ## chicken ostrich parrot sparrow vulture ## chicken 1 0 0 0 0 ## ostrich 0 1 0 0 0 ## parrot 0 0 2 0 0 ## sparrow 0 0 0 2 0 ## vulture 0 0 0 0 2 caret::confusionMatrix( table(tstdatnw[,5],predicted.tstdatnw.rpart)) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 26 of 31 10/18/2020, 4:30 AM
  • 27. ## Confusion Matrix and Statistics ## ## predicted.tstdatnw.rpart ## chicken ostrich parrot sparrow vulture ## chicken 1 0 0 0 0 ## ostrich 0 1 0 0 0 ## parrot 0 0 2 0 0 ## sparrow 0 0 0 2 0 ## vulture 0 0 0 0 2 ## ## Overall Statistics ## ## Accuracy : 1 ## 95% CI : (0.6306, 1) ## No Information Rate : 0.25 ## P-Value [Acc > NIR] : 1.526e-05 ## ## Kappa : 1 ## ## Mcnemar's Test P-Value : NA ## ## Statistics by Class: ## ## Class: chicken Class: ostrich Class: parrot Class: s parrow ## Sensitivity 1.000 1.000 1.00 1.00 ## Specificity 1.000 1.000 1.00 1.00 ## Pos Pred Value 1.000 1.000 1.00 1.00 ## Neg Pred Value 1.000 1.000 1.00 1.00 ## Prevalence 0.125 0.125 0.25 0.25 ## Detection Rate 0.125 0.125 0.25 0.25 ## Detection Prevalence 0.125 0.125 0.25 0.25 ## Balanced Accuracy 1.000 1.000 1.00 1.00 ## Class: vulture ## Sensitivity 1.00 ## Specificity 1.00 ## Pos Pred Value 1.00 ## Neg Pred Value 1.00 ## Prevalence 0.25 ## Detection Rate 0.25 ## Detection Prevalence 0.25 ## Balanced Accuracy 1.00 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 27 of 31 10/18/2020, 4:30 AM
  • 28. … cdset ## nlegs can_fly height color species ## 1 2 0 25.000000 black chicken ## 2 2 1 40.000000 black vulture ## 3 2 1 20.000000 blue parrot ## 4 2 0 150.000000 black ostrich ## 5 2 1 10.000000 brown sparrow ## 6 2 0 21.795787 red chicken ## 7 2 0 39.459162 mixed chicken ## 8 2 0 22.981968 black chicken ## 9 2 0 17.744720 black chicken ## 10 2 0 25.911222 mixed chicken ## 11 2 1 39.016163 white vulture ## 12 2 1 40.037789 white vulture ## 13 2 1 42.251693 grey vulture ## 14 2 1 39.014589 grey vulture ## 15 2 1 38.475420 white vulture ## 16 2 1 20.316044 mixed parrot ## 17 2 1 22.712721 teal parrot ## 18 2 1 22.840455 mixed parrot ## 19 2 1 14.934359 blue parrot ## 20 2 1 21.195914 blue parrot ## 21 2 0 160.085412 black ostrich ## 22 2 0 140.594205 black ostrich ## 23 2 0 174.088029 grey ostrich ## 24 2 0 157.684178 grey ostrich ## 25 2 0 135.249085 grey ostrich ## 26 2 1 9.295639 dark cement sparrow ## 27 2 1 11.266186 dark cement sparrow ## 28 2 1 9.336063 brown sparrow ## 29 2 1 10.169087 brown sparrow ## 30 2 1 11.060101 brown sparrow klaR::partimat(Species~.,data=iris,method="lda") Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 28 of 31 10/18/2020, 4:30 AM
  • 29. cat ("the sample proportions are") ## the sample proportions are table(cdset$species) ## ## chicken ostrich parrot sparrow vulture ## 6 6 6 6 6 table(cdset$species)/sum(table(cdset$species)) Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 29 of 31 10/18/2020, 4:30 AM
  • 30. ## ## chicken ostrich parrot sparrow vulture ## 0.2 0.2 0.2 0.2 0.2 cat ("the training set proportions are") ## the training set proportions are table(trdata$species) ## ## chicken ostrich parrot sparrow vulture ## 4 5 3 4 4 table(trdata$species)/sum(table(trdata$species)) ## ## chicken ostrich parrot sparrow vulture ## 0.20 0.25 0.15 0.20 0.20 cat ("the test set proportions are") ## the test set proportions are table(tstdata$species) ## ## chicken ostrich parrot sparrow vulture ## 2 1 3 2 2 table(tstdata$species)/sum(table(tstdata$species)) ## ## chicken ostrich parrot sparrow vulture ## 0.2 0.1 0.3 0.2 0.2 Chapter-02-R-Tutorial file:///C:/Users/rk215/Documents/R-Tutorial.html#Learning_R_to_do_D... 30 of 31 10/18/2020, 4:30 AM