This program uses advanced SAS programming techniques, PROC SQL, and SAS macros to establish the following requirements:
- Build a filter for potentially bad data of variable Date and flag a warning for that observation in the log
- Calculate the annualized volatility for each random variable and correlation matrix for all the random variables for a length of time period (one month, for example), call it a time window.
- Output the volatility and correlation coefficient matrix need into a flat ASCII file.
- Relax the above specifications to allow (1) the weekly basis to vary to any number of days (2) the length of a time period to vary to any number of days.
- Output the lower triangle of correlation coefficient matrix (including the diagonal elements) for each time window of all the above random variables into a flat file.
Tool used:
SAS 9.3_M1
Techniques used:
SAS Macros
SQL Procedures
Arrays
Tool to calculate Annualized Volatility over a period of time: SAS Macros, SQL Procedures and Arrays
1. /*
Submitted
by:
Akanksha
Jain
*/
/*
Project
Requirements:
1)
Read
the
ASCII
data
file
into
SAS.
The
variable
names
are
date
(mmddyy8.)
usdlr3m
usdlr2y
usdlr3y
usdlr5y
usdlr10y
gbplr3m
gbplr2y
gbplr3y
gbplr5y
gbplr10y
demlr3m
demlr2y
demlr3y
demlr5y
demlr10y
(in
this
order).
2)
The
definitions
of
above
variables
are
that
they
are
log
of
daily
quote
ratio
of
some
currencies.
3)
In
your
program,
build
a
filter
for
potentially
bad
data
of
variable
Date
and
flag
a
warning
for
that
observation
in
the
log.
For
example,
if
the
date
has
a
value
of
10/32/89,
then
this
record
should
be
identified,
and
a
waning
flag
should
be
issued.
4)
Calculate
the
volatility
for
each
random
variable
and
correlation
matrix
for
all
the
random
variables
for
a
length
of
time
period
(one
month,
for
example),
let's
call
it
a
time
window.
The
volatility
and
correlation
coefficient
matrix
need
to
be
output
into
a
flat
ASCII
file.
The
time
window
that
we
want
to
calculate
volatility
from
moves
over
time
on
a
weekly
basis.
The
volatility
is
defined
as
standard
deviation
of
the
above
variables
(except
date,
a
nonrandom
variable).
Note:
the
volatility
calculation
here
is
based
on
daily
data;
the
volatility
you
need
to
get
is
annualized,
which
can
be
done
by
multiplying
a
factor
of
square
root
of
250
to
the
daily
volatility.
5)
Relax
the
above
specifications
to
allow
(1)
the
weekly
basis
to
vary
to
any
number
of
days
(2)
the
length
of
a
time
period
to
vary
to
any
number
of
days.
6)
Output
the
lower
triangle
of
correlation
coefficient
matrix
(including
the
diagonal
elements)
for
each
time
window
of
all
the
above
random
variables
into
a
flat
file.
*/
options
mlogic
mprint
symbolgen
nonumber
nodate;
filename
mylib
'Z:BerkeleyAdvanced_SAS_Project';
data
currency;
infile
mylib('output.txt');
input
date
mmddyy8.
usdlr3m
usdlr2y
usdlr3y
usdlr5y
usdlr10y
gbplr3m
gbplr2y
gbplr3y
gbplr5y
gbplr10y
demlr3m
demlr2y
demlr3y
demlr5y
demlr10y;
format
date
mmddyy8.;
year_
=
year(date);
month_
=
month(date);
day_
=
day(date);
run;
proc
print
data
=
currency;
title
'data
set
Currency
-‐
reading
an
ASCII
file
into
a
SAS
data
set';
run;
2. proc
contents
data
=
currency;
run;
proc
sort
data
=
currency;
by
date;
run;
filename
ann_vol
'Z:BerkeleyAdvanced_SAS_Projectann_vol.txt';
/*
Macro
VOL_CORR
does
the
following:
1)
Uses
the
data
set
CURRENCY
(which
is
created
from
an
ASCII
file
and
has
variables:
Date
(mmddyy8.)
usdlr3m
usdlr2y
usdlr3y
usdlr5y
usdlr10y
gbplr3m
gbplr2y
gbplr3y
gbplr5y
gbplr10y
demlr3m
demlr2y
demlr3y
demlr5y
demlr10y).
These
variables
are
the
log
of
daily
quote
ratio
of
some
currencies.
2)
Flags
a
warning
in
the
log
if
the
date
has
a
bad
value
such
as
10/32/89,
and
creates
a
variable
flag_bad
which
will
have
a
value
'Y'
if
the
Date
value
is
bad
or
'N'
if
not.
3)
Calculates
the
annualized
volatility
for
the
given
variables
for
a
given
time
window
and
leap-‐
uses
macro
VOLATILITY.
4)
Calculates
the
correlation
matrix
for
the
given
variables
for
a
given
time
window
and
leap-‐
uses
macro
CORRELATION.
5)
Captures
the
lower
triangle
of
the
correlation
matrix
for
the
given
variables
for
a
given
time
window
and
leap-‐
uses
macro
CORR_LOWTRI.
6)
Prints
appropriate
titles
-‐
uses
macro
TITLE_VOL
TITLE_CORR
TITLE_CORR_LOWTRI.
*/
%macro
vol_corr(win_len,leap);
data
cfinal;
set
currency
end=last;
if
month_
gt
12
or
day_
gt
31
then
do;
put
'Warning:
Observation
number:
'
_n_
'has
bad
data';
flag_bad
=
'Y';
end;
else
do;
flag_bad
=
'N';
end;
format
date_first
date_last
date9.;
if
_n_
=
1
then
do;
date_first
=
date;
put
'First
date
of
data
set
is:'
date_first;
end;
retain
date_first;
if
last
then
do;
3.
date_last
=
date;
put
'Last
date
of
data
set
is:'
date_last;
date_diff
=
(date_last-‐
date_first);
put
'Date
difference
in
days
is:'
date_diff;
if
&leap
eq
0
then
do;
put
'The
Leap
cannot
be
zero';
end;
else
do;
n_time_windows
=
CEIL(date_diff/&leap);
put
'Number
of
Time
windows
are:'
n_time_windows;
c_date_last
=
put(date_last,
date9.);
i
=0;
do
until(i
ge
n_time_windows);
put
'value
of
I
is:'
i;
next_date
=
intnx('day',date_first,
&win_len);
c_date_first
=
put(date_first,
date9.);
c_next_date
=
put(next_date,
date9.);
put
'char
start
date
is
c_date_first:'
c_date_first;
put
'char
end
date
is
c_next_date:'
c_next_date;
call
execute('%title_vol('||
c_date_first
||','||c_next_date||')');
call
execute('%volatility('||
c_date_first
||','||c_next_date||')');
call
execute('%title_corr('||
c_date_first
||','||c_next_date||')');
call
execute('%correlation('||
c_date_first
||','||c_next_date||')');
call
execute('%title_corr_lowtri('||
c_date_first
||','||c_next_date||')');
call
execute('%corr_lowtri('||
c_date_first
||','||c_next_date||')');
i=i+1;
date_first
=
intnx('day',date_first,
&leap);
put
'start
date
at
the
end
of
loop
for
next
iteration
is
date_first:'
date_first;
end;
end;
end;
run;
%mend
vol_corr;
/*
MACRO
FOR
CREATING
THE
ANNUALIZED
VOLATILITY
FOR
A
GIVEN
RANGE
OF
DATES
*/
%macro
volatility(start_dt,
end_dt);
4. proc
sql;
select
std(usdlr3m)*sqrt(250)
as
STD_usdlr3m,
std(usdlr2y)*sqrt(250)
as
STD_usdlr2y,
std(usdlr3y)*sqrt(250)
as
STD_usdlr3y,
std(usdlr5y)*sqrt(250)
as
STD_usdlr5y,
std(usdlr10y)*sqrt(250)
as
STD_usdlr10y,
std(gbplr3m)*sqrt(250)
as
STD_gbplr3m,
std(gbplr2y)*sqrt(250)
as
STD_gbplr2y,
std(gbplr3y)*sqrt(250)
as
STD_gbplr3y,
std(gbplr5y)*sqrt(250)
as
STD_gbplr5y,
std(gbplr10y)*sqrt(250)
as
STD_gbplr10y,
std(demlr3m)*sqrt(250)
as
STD_demlr3m,
std(demlr2y)*sqrt(250)
as
STD_demlr2y,
std(demlr3y)*sqrt(250)
as
STD_demlr3y,
std(demlr5y)*sqrt(250)
as
STD_demlr5y,
std(demlr10y)*sqrt(250)
as
STD_demlr10y
from
cfinal
where
date
between
"&start_dt"d
and
"&end_dt"d;
quit;
%mend
volatility;
/*
MACRO
FOR
CREATING
THE
CORRELATION
MATRIX
FOR
A
GIVEN
RANGE
OF
DATES
*/
%macro
correlation(start_dt,
end_dt);
proc
corr
data
=
cfinal
pearson
noprob
nosimple;
var
usdlr3m
usdlr2y
usdlr3y
usdlr5y
usdlr10y
gbplr3m
gbplr2y
gbplr3y
gbplr5y
gbplr10y
demlr3m
demlr2y
demlr3y
demlr5y
demlr10y;
where
date
between
"&start_dt"d
and
"&end_dt"d;
run;
%mend
correlation;
/*
MACRO
FOR
PRINTING
THE
TITLE
"ANNUALIZED
VOLATILITY"
FOR
A
GIVEN
RANGE
OF
DATES
*/
%macro
title_vol(start_dt,
end_dt);
title
"Annualized
Volatility
for:
&start_dt
-‐
&end_dt"
;
%mend
title_vol;
/*
MACRO
FOR
PRINTING
THE
TITLE
"CORRELATION
MATRIX"
FOR
A
GIVEN
RANGE
OF
DATES
5. */
%macro
title_corr(start_dt,
end_dt);
title
"Correlation
Matrix
for:
&start_dt
-‐
&end_dt"
;
%mend
title_corr;
/*
MACRO
FOR
PRINTING
THE
TITLE
"LOWER
TRIANGLE
OF
THE
CORRELATION
MATRIX"
FOR
A
GIVEN
RANGE
OF
DATES
*/
%macro
title_corr_lowtri(start_dt,
end_dt);
title
"Lower
Triangle
of
Correlation
Matrix
for:
&start_dt
-‐
&end_dt"
;
%mend
title_corr_lowtri;
/*
MACRO
FOR
CREATING
ONLY
THE
LOWER
TRIANGLE
OF
THE
CORRELATION
MATRIX
FOR
A
GIVEN
RANGE
OF
DATES
*/
%macro
corr_lowtri(start_dt,
end_dt);
proc
corr
data
=
cfinal
pearson
noprob
nosimple
noprint
outp
=
cfinal_corr;
var
usdlr3m
usdlr2y
usdlr3y
usdlr5y
usdlr10y
gbplr3m
gbplr2y
gbplr3y
gbplr5y
gbplr10y
demlr3m
demlr2y
demlr3y
demlr5y
demlr10y;
where
date
between
"&start_dt"d
and
"&end_dt"d;
run;
data
corr_matrix_full;
set
cfinal_corr;
where
_TYPE_
=
"CORR";
run;
data
corr_array_lower
noobs;
set
corr_matrix_full;
array
full_tri
{*}
usdlr3m
usdlr2y
usdlr3y
usdlr5y
usdlr10y
gbplr3m
gbplr2y
gbplr3y
gbplr5y
gbplr10y
demlr3m
demlr2y
demlr3y
demlr5y
demlr10y;
do
i=1
to
dim(full_tri);
if
i
gt
_n_
then
full_tri{i}=.;
end;
drop
i
_type_;
run;
proc
print
data
=
corr_array_lower
noobs;
title
"Lower
Triangle
for
for:&start_dt
-‐
&end_dt";
run;
%mend
corr_lowtri;