More Related Content
Similar to Credit Card Data Statistical Analysis (20)
Credit Card Data Statistical Analysis
- 18. ## (Intercept) cardyes reports age income
## ‐173.2165091 0.1299925 ‐1.5594275 0.5741566 51.2707456
## share owneryes selfempyes dependents months
## 2465.6948184 ‐4.6081681 ‐5.0140531 4.3901807 ‐0.1346725
## majorcards active
## ‐3.0864627 0.8222948
abline(0,coef(g1)['income'])
#The plot indicates that income should be entered linearly into the model.
#ROBUST REGRESSION
#OLS
g.ols <‐ lm(expenditure ~ ., CreditCard)
library(MASS)
g.huber <‐ rlm(expenditure ~ ., psi = psi.huber, data = CreditCard)#huber
g.hampel <‐ rlm(expenditure ~ ., psi = psi.hampel, init = "lts", data = CreditCard) #hampel
g.bisquare <‐ rlm(expenditure ~ ., psi = psi.bisquare, init = "lts", data = CreditCard) #bisquar
e
- 19. ## Warning in rlm.default(x, y, weights, method = method, wt.method =
## wt.method, : 'rlm' failed to converge in 20 steps
library(robustbase)
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## The following object is masked from 'package:faraway':
##
## epilepsy
g.lts <‐ ltsReg(expenditure ~ ., data = CreditCard) # LTS
## Warning in covMcd(X, alpha = alpha, use.correction = use.correction): The 665‐th order statis
tic of the absolute deviation of variable 1
## is zero.
## There are 1023 observations (in the entire dataset of 1319 obs.)
## lying on the hyperplane with equation a_1*(x_i1 ‐ m_1) + ... +
## a_p*(x_ip ‐ m_p) = 0 with (m_1, ..., m_p) the mean of these
## observations and coefficients a_i from the vector a <‐ c(1, 0, 0,
## 0, 0, 0, 0, 0, 0, 0, 0)
library(quantreg)
## Warning: package 'quantreg' was built under R version 3.2.5
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'quantreg'
- 20. ## The following object is masked from 'package:survival':
##
## untangle.specials
g.lad <‐ rq(expenditure ~ ., data = CreditCard) # LAD
library(car)
coefs <‐ compareCoefs(g.ols, g.huber, g.hampel, g.bisquare, g.lts, g.lad, se = FALSE)
## Warning in compareCoefs(g.ols, g.huber, g.hampel, g.bisquare, g.lts,
## g.lad, : models to be compared are of different classes
##
## Call:
## 1: lm(formula = expenditure ~ ., data = CreditCard)
## 2: rlm(formula = expenditure ~ ., data = CreditCard, psi = psi.huber)
## 3: rlm(formula = expenditure ~ ., data = CreditCard, psi = psi.hampel,
## init = "lts")
## 4: rlm(formula = expenditure ~ ., data = CreditCard, psi =
## psi.bisquare, init = "lts")
## 5: ltsReg.formula(formula = expenditure ~ ., data = CreditCard)
## 6: rq(formula = expenditure ~ ., data = CreditCard)
## Est. 1 Est. 2 Est. 3 Est. 4 Est. 5 Est. 6
## (Intercept) ‐1.73e+02 ‐9.84e+01 ‐7.68e+01 ‐5.69e+01 ‐8.11e+01
## cardyes 1.30e‐01 1.66e+00 9.70e+00 4.75e+00 6.00e+00 ‐4.53e+00
## reports ‐1.56e+00 ‐7.13e‐01 ‐4.49e‐01 ‐6.43e‐01 ‐1.40e‐01 ‐7.52e‐01
## age 5.74e‐01 1.69e‐01 2.24e‐01 1.21e‐01 7.18e‐02 9.04e‐02
## income 5.13e+01 3.33e+01 2.29e+01 1.80e+01 4.01e+00 3.07e+01
## share 2.47e+03 2.26e+03 2.19e+03 2.17e+03 2.07e+03 2.25e+03
## owneryes ‐4.61e+00 1.37e‐01 4.48e‐01 ‐3.70e‐01 ‐7.00e‐01 ‐2.15e‐01
## selfempyes ‐5.01e+00 ‐4.06e+00 ‐6.27e+00 ‐3.08e+00 9.24e‐01 ‐1.08e+00
## dependents 4.39e+00 2.50e+00 4.04e+00 1.67e+00 5.42e‐01 6.63e‐01
## months ‐1.35e‐01 ‐2.22e‐02 ‐2.73e‐03 1.36e‐02 ‐6.80e‐03 ‐1.10e‐02
## majorcards ‐3.09e+00 2.92e‐01 1.33e+00 1.95e+00 ‐1.15e‐01 3.60e‐01
## active 8.22e‐01 ‐1.85e‐01 ‐3.52e‐01 ‐1.58e‐01 1.80e‐02 ‐1.04e‐01
## Intercept ‐1.56e+01
colnames(coefs) <‐ c("OLS", "Huber", "Hampel", "Bisquare", "LTS", "LAD")
coefs
- 27. par(oldpar)
#Compare the two models using the mse's from the cross‐validations
#with number of folds equal to 3.
#Which model gives the better mse?
seed <‐ round(runif(1, min=0, max=100))
oldpar1 <‐ par(mfrow=c(1,2))
mse.g1 <‐ CVlm(data = CreditCard,
form.lm=g1,
m=3,
seed=seed,
printit=F,
main = "g1")
## Warning in CVlm(data = CreditCard, form.lm = g1, m = 3, seed = seed, printit = F, :
##
## As there is >1 explanatory variable, cross‐validation
## predicted values for a fold are not a linear function
## of corresponding overall predicted values. Lines that
## are shown for the different folds are approximate
mse.g.step <‐ CVlm(data = CreditCard,
form.lm=g.step,
m=3,
seed=seed,
printit=F,
main = "g.step")
## Warning in CVlm(data = CreditCard, form.lm = g.step, m = 3, seed = seed, :
##
## As there is >1 explanatory variable, cross‐validation
## predicted values for a fold are not a linear function
## of corresponding overall predicted values. Lines that
## are shown for the different folds are approximate
- 30. ##
## Attaching package: 'ellipse'
## The following object is masked from 'package:car':
##
## ellipse
plot(ellipse(g1, c("share", "active")),
type = "l",
main = "Joint Confidence Region")
points(0,0)
points(coef(g1)["share"], coef(g1)["active"],
pch=18)
abline(v=confint(g1)["share",], lty=2)
abline(h=confint(g1)["active",], lty=2)
##Since zero does not fall in any limit, we reject the null hypothesis and accept the alternate
hypothesis.
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that
generated the plot.