M09-Cross validating-naive-bayes

Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r...
1 of 7 11/23/2020, 5:27 PM

2 of 7 11/23/2020, 5:27 PM

library('e1071')
file<-'c://Users/rk215/Data/heart.csv'
heart<-read.csv(file,head=T,sep=',',stringsAsFactors=F)
head(heart)
## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca tha
l
## 1 63 1 3 145 233 1 0 150 0 2.3 0 0
1
## 2 37 1 2 130 250 0 1 187 0 3.5 0 0
2
## 3 41 0 1 130 204 0 0 172 0 1.4 2 0
2
## 4 56 1 1 120 236 0 1 178 0 0.8 2 0
2
## 5 57 0 0 120 354 0 1 163 1 0.6 2 0
2
## 6 57 1 0 140 192 0 1 148 0 0.4 1 0
1
## target
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
catheart<-heart[,c(2,3,6,7,9,11,12,13,14)]
set.seed(43)
trdidx<-sample(1:nrow(catheart),0.7*nrow(catheart),replace=F)
trcatheart<-catheart[trdidx,]
tstcatheart<-catheart[-trdidx,]
nb.model<-naiveBayes(target~.,data=trcatheart)
#str(nbtr.model)
object.size(nb.model) #11096
## 11096 bytes
nb.pred<-predict(nb.model,tstcatheart[,-c(9)],type='raw')
nb.class<-unlist(apply(round(nb.pred),1,which.max))-1
nb.tbl<-table(tstcatheart[[9]], nb.class)
nb.cfm<-caret::confusionMatrix(nb.tbl)
nb.cfm
3 of 7 11/23/2020, 5:27 PM

## Confusion Matrix and Statistics
##
## nb.class
## 0 1
## 0 28 12
## 1 3 48
##
## Accuracy : 0.8352
## 95% CI : (0.7427, 0.9047)
## No Information Rate : 0.6593
## P-Value [Acc > NIR] : 0.0001482
##
## Kappa : 0.6571
##
## Mcnemar's Test P-Value : 0.0388671
##
## Sensitivity : 0.9032
## Specificity : 0.8000
## Pos Pred Value : 0.7000
## Neg Pred Value : 0.9412
## Prevalence : 0.3407
## Detection Rate : 0.3077
## Detection Prevalence : 0.4396
## Balanced Accuracy : 0.8516
##
## 'Positive' Class : 0
##
start_tm <- proc.time()
N<-nrow(trcatheart)
NF=10
folds<-split(1:N,cut(1:N, quantile(1:N, probs = seq(0, 1, by =1/NF))))
length(folds)
## [1] 10
lapply(folds,length)
4 of 7 11/23/2020, 5:27 PM

## $`(1,22.1]`
## [1] 21
##
## $`(22.1,43.2]`
## [1] 21
##
## $`(43.2,64.3]`
## [1] 21
##
## $`(64.3,85.4]`
## [1] 21
##
## $`(85.4,106]`
## [1] 21
##
## $`(106,128]`
## [1] 21
##
## $`(128,149]`
## [1] 21
##
## $`(149,170]`
## [1] 21
##
## $`(170,191]`
## [1] 21
##
## $`(191,212]`
## [1] 22
ridx<-sample(1:nrow(trcatheart),nrow(trcatheart),replace=FALSE) # randomize
the data
cv_df<-do.call('rbind',lapply(folds,FUN=function(idx,data=trcatheart[ridx,])
{
m<-naiveBayes(target~.,data=data[-idx,]) # keep one fold for validation
p<-predict(m,data[idx,-c(9)],type='raw') # predict for that test fold
pc<-unlist(apply(round(p),1,which.max))-1
pred_tbl<-table(data[idx,c(9)],pc) #table(actual,predicted)
pred_cfm<-caret::confusionMatrix(pred_tbl)
list(fold=idx,m=m,cfm=pred_cfm) # store the fold, model,cfm
}
)) # lapply repeats over all folds
5 of 7 11/23/2020, 5:27 PM

cv_df<-as.data.frame(cv_df)
tstcv.perf<-as.data.frame(do.call('rbind',lapply(cv_df$cfm,FUN=function(cfm)
c(cfm$overall,cfm$byClass))))
(cv.tst.perf<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,mea
n))
## Accuracy Kappa AccuracyLower
## 0.8683983 0.7318000 0.6545460
## AccuracyUpper AccuracyNull Sensitivity
## 0.9700452 0.5666667 0.8373377
## Specificity Pos Pred Value Neg Pred Value
## 0.8900699 0.8924825 0.8629060
## Precision Recall F1
## 0.8924825 0.8373377 0.8583395
## Prevalence Detection Rate Detection Prevalence
## 0.4523810 0.3766234 0.4324675
## Balanced Accuracy
## 0.8637038
(cv.tst.perf.var<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],
2,sd))
## Accuracy Kappa AccuracyLower
## 0.06018717 0.11323819 0.07464027
## AccuracyUpper AccuracyNull Sensitivity
## 0.02365967 0.07221786 0.06971453
## Specificity Pos Pred Value Neg Pred Value
## 0.12234605 0.10504433 0.07414154
## Precision Recall F1
## 0.10504433 0.06971453 0.04174328
## Prevalence Detection Rate Detection Prevalence
## 0.08908708 0.06878895 0.12277136
## Balanced Accuracy
## 0.05126200
6 of 7 11/23/2020, 5:27 PM

tstcv_preds<-lapply(cv_df$m,FUN=function(M,D=tstcatheart[,-c(9)])predict(M,
D,type='raw'))
tstcv_cfm<-lapply(tstcv_preds,FUN=function(P,A=tstcatheart[[9]])
{pred_class<-unlist(apply(round(P),1,which.max))-1
pred_tbl<-table(pred_class,A)
pred_cfm<-caret::confusionMatrix(pred_tbl)
pred_cfm
})
tstcv.perf<-as.data.frame(do.call('rbind',lapply(tstcv_cfm,FUN=function(cfm)
c(cfm$overall,cfm$byClass))))
cv.tst.perf<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,mea
n)
cv.tst.perf.var<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,
sd)
###################
7 of 7 11/23/2020, 5:27 PM

M09-Cross validating-naive-bayes

Recommended

Recommended

More Related Content

What's hot

What's hot (20)

Similar to M09-Cross validating-naive-bayes

Similar to M09-Cross validating-naive-bayes (20)

More from Raman Kannan

More from Raman Kannan (20)

Recently uploaded

Recently uploaded (20)

M09-Cross validating-naive-bayes