1. In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
import seaborn as sns
In [2]:
######################################## Import Data From Quandl API
##########################################################
#import quandl
#import pandas as pd
#import xlsxwriter
##mydata = quandl.get_table('ZACKS/FC', ticker='AAPL')
#quandl.ApiConfig.api_key = "XXXXXXXXXXXXXX" ########## Register and
generate a key ##############
#mydata = quandl.get("FRED/GDP")
#excelfile= r'dailyd2.xlsx'
#workbook = xlsxwriter.Workbook(excelfile)
#worksheet1 = workbook.add_worksheet('daily')
#bold = workbook.add_format({'bold': True})
#row = 0
#worksheet1.write(row,0,'Date')
#mydata = quandl.get('WIKI/AAPL', start_date="2017-01-01", end_date="2017-0
8-25")
#daterange = mydata.index ############# Save the date range ##############
#
#for i in daterange:
# row = row+1
# worksheet1.write(row,0,i)
#data = pd.read_csv(r'0825_quandl_ticks2.csv') ########### Load the list of
tickers #############
#tlist = data['Ticker']
#col = 0
#for tl in tlist:
# try:
# mydata = quandl.get('WIKI/'+tl, start_date="2017-01-01", end_date=
"2017-08-25")
# if len(mydata)-1 == len(daterange):
# row = 0
# col = col+1
# worksheet1.write(row,col,tl,bold)
# for dr in daterange:
# row = row+1
# for i,j in mydata['Adj. Close'].iteritems():
# if dr==i:
2. # if dr==i:
# worksheet1.write(row,col,j)
# print(tl,j)
# except Exception as e:
# print(e)
#workbook.close()
In [3]:
######## Load Data #########
data = pd.read_excel(r'C:ModelDatadailyd2.xlsx', index_col='Date', parse_
dates=True)
In [4]:
################ Calculate and plot volatility #####################
df_stat = pd.DataFrame(columns = ['std','mean','normalized_std'])
df_stat[['std','mean','normalized_std']] = pd.DataFrame([data.std(),data.m
ean(),data.std()/data.mean()]).T
df_stat.sort_values('normalized_std')
plt.title('Normalized Standard Deviation')
plt.hist(df_stat['normalized_std'])
fig = plt.gcf()
fig.set_size_inches(14.5, 5.5)
plt.show()
In [5]:
######### Based on the graph above filter out stocks with high variance
and low returns#############
df_fltr = df_stat[df_stat['normalized_std']<.15]
df = data[df_fltr.index]
tkl = [x for x in df if (df[x][-1]-df[x][0])/df[x][0]>=.25 ]
df = df[tkl]
fltr_tk = []
for tk in df:
Y = df[tk].as_matrix()
X = range(len(df.index))
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
if results.params[1] > .01: ########## Filter on slope coefficient from
Regression Model #########
16. In [7]:
############### Run ARIMA Model ###################
store = {}
for tk in fltr_tk:
train = df[tk][0:-20]
test = df[tk][len(train):]
ap = 99
ad = 99
aq = 99
amape = 99
af = []
for p in range(10):
for q in range(10):
for d in range(2):
try:
model = ARIMA(train, order=(p, d, q)).fit()
predict = model.forecast(len(test))
fcst=predict[0]
mapelist = []
for i in range(len(fcst)):
mapelist.insert(i, (np.absolute(test[i] - fcst[i])) /
test[i])
mape = np.mean(mapelist) * 100
mape = round(mape,2)
except:
mape = 9999
pass
if amape > mape:
amape = mape
ap = p
ad = d
aq = q
af= fcst
store[tk] = af
plt.plot(train)
plt.plot(test,label='Actual')
plt.plot(test.index,af,label='Predicted')
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.title(str(tk)+"_"+"MAPE"+"_"+str(amape)+"_"+"Order"+"_"+"("+str(ap)+
str(ad)+str(aq)+")")
plt.legend(loc='best')
plt.show()