Performing Extreme Value Analysis (EVA) using R
Performing Extreme Value Analysis (EVA)
using R
Whitney Huang
October 23, 2017
Data: Fort Collins daily precipitation
Read the data
Plot the data
Summarize the data
EVA: Block Maxima Approach
Step I: Determine the block size and compute maxima for blocks
Step II: Fit a GEV to the maxima and assess the fit
Step III: Perform inference for return levels
EVA: Peak Over Threshold Approach
Step I: Pick a threshold and extract the threshold exceedances
Step II: Fit a GPD to threshold excesses and assess the fit
Step III: Perform inference for return levels
Data: Fort Collins daily precipitation
We will analyze the daily precipitation amounts (inches) in Fort Collins, Colorado
from 1900 to 1999. (Source Colorado Climate Center, Colorado State University
Read the data
# Install the packages for this demos
#install.packages(c("extRemes", "scales", "dplyr", "ismev"))
library(extRemes) # Load the extRemes package for performing EVA
data(Fort) # Load the preciptation data (it is a built-in data sets in extRemes)
head(Fort) # Look at the first few observations
## obs tobs month day year Prec
## 1 1 1 1 1 1900 0
## 2 2 2 1 2 1900 0
## 3 3 3 1 3 1900 0
## 4 4 4 1 4 1900 0
Performing Extreme Value Analysis (EVA) using R
## 5 5 5 1 5 1900 0
## 6 6 6 1 6 1900 0
tail(Fort) # Look at the last few observations
## obs tobs month day year Prec
## 36519 36519 360 12 26 1999 0
## 36520 36520 361 12 27 1999 0
## 36521 36521 362 12 28 1999 0
## 36522 36522 363 12 29 1999 0
## 36523 36523 364 12 30 1999 0
## 36524 36524 365 12 31 1999 0
str(Fort) # Look at the structure of the data set
## 'data.frame': 36524 obs. of 6 variables:
## $ obs : num 1 2 3 4 5 6 7 8 9 10 ...
## $ tobs : num 1 2 3 4 5 6 7 8 9 10 ...
## $ month: num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ year : num 1900 1900 1900 1900 1900 1900 1900 1900 1900 1900 ...
## $ Prec : num 0 0 0 0 0 0 0 0 0 0 ...
Plot the data
# Set up the time format
days_year <- table(Fort$year)
time_year <- Fort$tobs /, times = days_year)
time <- Fort$year + time_year
# Plot the spatial location of Fort Collins and the daily precip time series
par(mfrow = c(2, 1))
library(maps) # Load the package for map drawing
map("state", col = "gray", mar = rep(0, 4))
# Fort Collins, Lat Long Coordinates: 40.5853° N, 105.0844° W
points(-105.0844, 40.5853, pch = "*", col = "red", cex = 1.5)
par(las = 1, mar = c(5.1, 4.1, 1.1, 2.1))
plot(time, Fort$Prec, type = "h",
xlab = "Time (Year)",
ylab = "Daily precipitation (in)")
Performing Extreme Value Analysis (EVA) using R
# Zoom in to look at seasonal variation
par(mfrow = c(1, 2), mar = c(5.1, 4.1, 4.1, 0.6))
id <- which(Fort$year %in% 1999) # Look at year 1999
par(las = 1)
# Plot the 1999 daily precip time series
plot(1:365, Fort$Prec[id], type = "h",
xlab = " ",
ylab = "Daily precipitation (in)",
xaxt = "n", ylim = c(0, max(Fort$Prec)),
main = "1999")
par(las = 3)
axis(1, at = seq(15, 345, 30), labels =
Leaf_Day <- seq(1520, 1520 + 1460 * 23, 1460)
par(las = 1)
library(scales) # Load the scales pacakge to modify color transparency
# Plot the daily precip as a function of the day of the year
plot(rep(1:365, 100), Fort$Prec[-Leaf_Day], pch = 1,
col = alpha("blue", 0.25), cex = 0.5,
xaxt = "n", xlab = " ", ylab = " ", main = "1900 ~ 1999",
ylim = c(0, max(Fort$Prec)))
par(las = 3)
axis(1, at = seq(15, 345, 30), labels =
Performing Extreme Value Analysis (EVA) using R
# Define "summer" and "winter" daily precip
summer <- which(Fort$month %in% 6:8)
winter <- which(Fort$month %in% c(12, 1, 2))
prec_summer <- Fort$Prec[summer]
prec_winter <- Fort$Prec[winter]
# Plot the histograms for summer and winter non-zero daily precip
par(mfrow = c(1, 2), mar = c(5.1, 4.1, 4.1, 1.1))
max_precip <- max(prec_summer, prec_winter)
brk <- seq(0, max_precip, length.out = 60)
hist(prec_summer[prec_summer > 0], breaks = brk, col = alpha("red", 0.5),
prob = T, xlim = c(0, max_precip), xlab = "Summer (JJA) prec (in)",
ylab = "Density", main = "", ylim = c(0, 8))
rug(prec_summer, col = "red", lwd = 0.5)
hist(prec_winter[prec_winter > 0], breaks = brk, col = alpha("blue", 0.5),
prob = T, xlim = c(0, max_precip), xlab = "Winter (DJF) prec (in)",
ylab = " ", main = "", ylim = c(0, 8))
rug(prec_winter, col = "blue", lwd = 0.5)
Performing Extreme Value Analysis (EVA) using R
Summarize the data
# Six number summary for summer and winter daily precip
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.05289 0.01000 4.63000
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.01477 0.00000 1.32000
# Variation
## [1] 0.2004784
## [1] 0.0641466
## [1] 0.01
Performing Extreme Value Analysis (EVA) using R
## [1] 0
# Chance of rain
length(prec_summer[prec_summer > 0]) / length(prec_summer)
## [1] 0.2827174
length(prec_winter[prec_winter > 0]) / length(prec_winter)
## [1] 0.1476064
EVA: Block Maxima Approach
We are going to conduct an extreme value analysis using the extRemes package
developed and maintained by Eric Gilleland . In the previous lecture we
learned the block maxima method and threshold exceedances method for
doing EVA. Let’s see how that works in R .
Step I: Determine the block size and compute maxima for
grouped_summer <- group_by(Fort[summer, ], year)
# Extracting summer maxima and the timings when the maxima occur
summer_max <- summarise(grouped_summer, prec = max(Prec), t = which.max(Prec))
## # A tibble: 100 × 3
## year prec t
## <dbl> <dbl> <int>
## 1 1900 0.51 13
## 2 1901 1.46 15
## 3 1902 1.02 28
## 4 1903 0.53 47
## 5 1904 1.10 35
## 6 1905 1.11 66
## 7 1906 0.64 23
## 8 1907 1.21 56
## 9 1908 1.93 60
## 10 1909 1.11 28
Performing Extreme Value Analysis (EVA) using R
## # ... with 90 more rows
Let’s plot the summer maxima
# Setting up the figure configuration
old.par <- par(no.readonly = TRUE)
mar.default <- par("mar")
mar.left <- mar.default
mar.right <- mar.default
mar.left[2] <- 0
mar.right[4] <- 0
# Time series plot
par(fig = c(0.2, 1, 0, 1), mar = mar.left)
plot(1900 + 1:9200 / 92, prec_summer,
xlab = "Year", ylab = "",
main = "Summer Maximum n Fort Collins",
type = "h", pch = 19, cex = 0.5, col = "lightblue",
ylim = c(0, max_precip), yaxt = "n")
par(las = 2)
axis(4, at = 0:5)
par(las = 0)
mtext("Precipitation (in)", side = 2, line = 4)
abline(v = 1900:2000, col = "gray", lty = 2)
points(1900:1999 + summer_max$t / 90, summer_max$prec,
pch = 16, col = "blue", cex = 0.5)
# Histogram
hs <- hist(summer_max$prec,
breaks = seq(0, max_precip, length.out = 40),
plot = FALSE)
par(fig = c(0, 0.2, 0, 1.0), mar = mar.right, new = T)
plot (NA, type = 'n', axes = FALSE, yaxt = 'n',
col = rgb(0, 0, 0.5, alpha = 0.5),
xlab = "Density", ylab = NA, main = NA,
xlim = c(-max(hs$density), 0),
ylim = c(0, max_precip))
axis(1, at = c(-0.8, -0.4, 0), c(0.8, 0.4, 0), las = 2)
arrows(rep(0, length(hs$breaks[-40])), hs$breaks[-40],
-hs$density, hs$breaks[-40], col = "blue",
length = 0, angle = 0, lwd = 1)
arrows(rep(0, length(hs$breaks[-1])), hs$breaks[-1],
-hs$density, hs$breaks[-1], col = "blue",
length = 0, angle = 0, lwd = 1)
arrows(-hs$density, hs$breaks[-40], -hs$density,
hs$breaks[-1], col = "blue", angle = 0,
length = 0)
mle <- fevd(summer_max$prec)$results$par
xg <- seq(0, max_precip, length.out = 100)
lines(-gev.dens(mle, xg), xg, col = "red")
Performing Extreme Value Analysis (EVA) using R
Step II: Fit a GEV to the maxima and assess the fit
# Fit a GEV to summer maximum daily precip using MLE
gevfit <- fevd(summer_max$prec)
# Print the results
## fevd(x = summer_max$prec)
## [1] "Estimation Method used: MLE"
## Negative Log-Likelihood Value: 100.5622
## Estimated parameters:
## location scale shape
## 0.8262256 0.4974066 0.2158052
Performing Extreme Value Analysis (EVA) using R
## Standard Error Estimates:
## location scale shape
## 0.05627189 0.04516721 0.08241033
## Estimated parameter covariance matrix.
## location scale shape
## location 0.003166526 0.0014583680 -0.0013849537
## scale 0.001458368 0.0020400766 -0.0001826602
## shape -0.001384954 -0.0001826602 0.0067914617
## AIC = 207.1244
## BIC = 214.9399
#QQ plot
p <- 1:100 / 101
qm <- gevq(mle, 1 - p)
plot(qm, sort(summer_max$prec), xlim = c(0, 5), ylim = c(0, 5),
pch = 16, cex = 0.5, col = alpha("blue", 0.5),
xlab = "Model", ylab = "Empirical", main = "Quantile Plot")
abline(0, 1, lwd = 1.5)
Performing Extreme Value Analysis (EVA) using R
Step III: Perform inference for return levels
Suppose we are interested in estimating 100-year return level
RL100 <- return.level(gevfit, return.period = 100) # Estimate of the 100-year event
## fevd(x = summer_max$prec)
## return.level.fevd.mle(x = gevfit, return.period = 100)
## GEV model fitted to summer_max$prec
## Data are assumed to be stationary
## [1] "Return Levels for period units in years"
Performing Extreme Value Analysis (EVA) using R
## 100-year level
## 4.741325
# Quantify the estimate uncertainty
## Delta method
CI_delta <- ci(gevfit, return.period = 100, verbose = T)
## Preparing to calculate 95 % CI for 100-year return level
## Model is fixed
## Using Normal Approximation Method.
## fevd(x = summer_max$prec)
## [1] "Normal Approx."
## [1] "100-year return level: 4.741"
## [1] "95% Confidence Interval: (2.9471, 6.5356)"
## Profile likelihood method
CI_prof <- ci(gevfit, method = "proflik", xrange = c(2.5, 8),
return.period = 100, verbose = F)
## fevd(x = summer_max$prec)
## [1] "Profile Likelihood"
## [1] "100-year return level: 4.741"
## [1] "95% Confidence Interval: (3.5081, 7.5811)"
hist(summer_max$prec, breaks = seq(0, max_precip, length.out = 35),
col = alpha("lightblue", 0.2), border = "gray",
xlim = c(0, 8), prob = T, ylim = c(0, 1.2),
xlab = "summer max (in)",
main = "95% CI for 100-yr RL")
xg <- seq(0, 8, len = 1000)
mle <- gevfit$results$par
lines(xg, gev.dens(mle, xg), lwd = 1.5)
#for (i in 1:3) abline(v = CI_delta[i], lty = 2, col = "blue")
for (i in c(1, 3)) abline(v = CI_prof[i], lty = 2, col = "red")
abline(v = RL100, lwd = 1.5, lty = 2)
Performing Extreme Value Analysis (EVA) using R
#legend("topleft", legend = c("Delta CI", "Prof CI"),
#col = c("blue", "red"), lty = c(2, 3))
EVA: Peak Over Threshold Approach
Step I: Pick a threshold and extract the threshold
old.par <- par(no.readonly = TRUE)
mar.default <- par('mar')
mar.left <- mar.default
mar.right <- mar.default
Performing Extreme Value Analysis (EVA) using R
mar.left[2] <- 0
mar.right[4] <- 0
# Time series plot
par(fig = c(0.2, 1, 0, 1), mar = mar.left)
plot(1900 + 1:9200 / 92, prec_summer, type = "h", col = "lightblue",
xlab = "Year", ylab = "Daily Precip (inches)", yaxt = "n")
#Threshold exceedances
thres <- 0.4
ex <- prec_summer[prec_summer >= thres]
## [1] 344
#Extract the timing of POT
ex_t <- which(prec_summer >= thres)
abline(h = thres, col = "blue", lty = 2)
points(1900 + ex_t / 92, ex, col = alpha("blue", 0.5), pch = 16,
cex = 0.75)
par(las = 2)
axis(4, at = 0:5)
par(las = 0)
mtext("Precipitation (in)", side = 2, line = 5)
hs <- hist(ex, seq(thres, max_precip, len = 50), plot = FALSE)
par(fig = c(0, 0.2, 0, 1.0), mar = mar.right, new = T)
plot (NA, type = 'n', axes = FALSE, yaxt = 'n',
col = rgb(0,0,0.5, alpha = 0.5),
xlab = "Density", ylab = NA, main = NA,
xlim = c(-max(hs$density), 0),
ylim = c(0, max_precip))
axis(1, at = c(-3, -2, -1, 0), c(3, 2, 1, 0), las = 2)
#abline(h = 21, col = "red", lty = 5)
arrows(rep(0, length(hs$breaks[-50])), hs$breaks[-50],
-hs$density, hs$breaks[-50], col = "blue",
length = 0, angle = 0, lwd = 1)
arrows(rep(0, length(hs$breaks[-1])), hs$breaks[-1],
-hs$density, hs$breaks[-1], col = "blue",
length = 0, angle = 0, lwd = 1)
arrows(-hs$density, hs$breaks[-50], -hs$density,
hs$breaks[-1], col = "blue", angle = 0,
length = 0)
mle <- fevd(prec_summer, threshold = thres, type = "GP")$results$par
xg <- seq(thres, max_precip, length.out = 100)
lines(-gpd.dens(mle, thres, xg), xg, col = "red")
Performing Extreme Value Analysis (EVA) using R
How to choose the “right” threshold?
# Mean residula life plot
mrlplot(prec_summer, main = "Mean Residual Life", xlab = "Threshold (in)")
# I choose 0.4 as the threshold but note that the "straightness"
# is difficult to assess
abline(v = 0.4, col = "blue", lty = 2)
Performing Extreme Value Analysis (EVA) using R
Step II: Fit a GPD to threshold excesses and assess the fit
# Fit a GPD for threshold exceenances using MLE
gpdfit1 <- fevd(prec_summer, threshold = thres, type = "GP")
# Print the results
## fevd(x = prec_summer, threshold = thres, type = "GP")
## [1] "Estimation Method used: MLE"
## Negative Log-Likelihood Value: 46.14484
Performing Extreme Value Analysis (EVA) using R
## Estimated parameters:
## scale shape
## 0.3053321 0.3236911
## Standard Error Estimates:
## scale shape
## 0.02784980 0.07520322
## Estimated parameter covariance matrix.
## scale shape
## scale 0.0007756112 -0.001337429
## shape -0.0013374289 0.005655524
## AIC = 96.28967
## BIC = 103.9239
# QQ plot
p <- 1:344 / 345
qm <- gpdq(mle, 0.4, 1 - p)
plot(qm, sort(ex), xlim = c(0, 6), ylim = c(0, 6),
pch = 16, cex = 0.5, col = alpha("blue", 0.5),
xlab = "Model", ylab = "Empirical", main = "Quantile Plot")
abline(0, 1, lwd = 1.5)
Performing Extreme Value Analysis (EVA) using R
Step III: Perform inference for return levels
Again we are interested in estimating 100-year return level
# Here we need to adjust the return period as here we are
#estimating the return level for summer precip
RL100 <- return.level(gpdfit1, return.period = 100 * 92 / 365.25)
## fevd(x = prec_summer, threshold = thres, type = "GP")
## return.level.fevd.mle(x = gpdfit1, return.period = 100 * 92/365.25)
## GP model fitted to prec_summer
Performing Extreme Value Analysis (EVA) using R
## Data are assumed to be stationary
## [1] "Return Levels for period units in years"
## 25.1882272416153-year level
## 5.656769
CI_delta <- ci(gpdfit1, return.period = 100 * 92 / 365.25,
verbose = F)
## fevd(x = prec_summer, threshold = thres, type = "GP")
## [1] "Normal Approx."
## [1] "25.1882272416153-year return level: 5.657"
## [1] "95% Confidence Interval: (3.2246, 8.089)"
CI_prof <- ci(gpdfit1, method = "proflik", xrange = c(3, 10),
return.period = 100 * 92 / 365.25, verbose = F)
## fevd(x = prec_summer, threshold = thres, type = "GP")
## [1] "Profile Likelihood"
## [1] "25.1882272416153-year return level: 5.657"
## [1] "95% Confidence Interval: (3.9572, 9.4964)"
hist(ex, 40, col = alpha("lightblue", 0.2), border = "gray",
xlim = c(thres, 10), prob = T, ylim = c(0, 4),
xlab = "Threshold excess (in)",
main = "95% CI for 100-yr RL")
xg <- seq(thres, 10, len = 1000)
mle <- gpdfit1$results$par
lines(xg, gpd.dens(mle, thres, xg), lwd = 1.5)
#for (i in c(1, 3)) abline(v = CI_delta[i], lty = 2, col = "blue")
for (i in c(1,3)) abline(v = CI_prof[i], lty = 2, col = "red")
abline(v = RL100, lwd = 1.5, lty = 2)
Performing Extreme Value Analysis (EVA) using R
#legend("topleft", legend = c("Delta CI", "Prof CI"),
#col = c("blue", "red"), lty = c(2, 3))

