The document demonstrates various ways to visualize and plot stock market data using the matplotlib library in Python. It shows how to:
1) Import necessary libraries and read in stock price data; plot the closing prices and opening vs closing prices on single and multiple axes.
2) Plot daily price ranges both in dollar amounts and as a percentage of closing price.
3) Use subplots to show multiple data series simultaneously and loop through variables to simplify the code.
4) Save plots to both individual image files and a multipage PDF. Add annotations directly to points of interest.
5) Demonstrate dual axis plots and controlling the vertical axis scaling manually.
Scaling API-first – The story of a global engineering organization
Matplotlib demo code
1. # matplotlib demo from San Diego Python Data Analysis Workshop 20APR2013
# Drew Arnett
# a.arnett@ieee.org
# code from this file was copied and pasted in chunks to run
# import libraries that will be used
import matplotlib.mlab
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
# read in the data set]
x = matplotlib.mlab.csv2rec("s_p_historical_closes.csv")
# plot closing data
plt.plot(x.date, x.close, ".")
plt.show()
# plot opening and closing data on one plot
plt.plot(x.date, x.open, ".", label="open")
plt.plot(x.date, x.close, ".", label="close")
plt.legend()
plt.show()
# that wasn't very interesting, so...
# plot daily range
plt.plot(x.date, x.high-x.low, ".")
plt.show()
# that isn't very fair, so...
# plot range scaled against close and in %
dailyrange = 100.*(x.high-x.low)/x.close
plt.plot(x.date, dailyrange, ".")
plt.show()
# use subplots to show more than one set of data at a time
# can also say subplot(6,1,1)
# subplot(number of subplot rows, number of subplot columns, specific subplot to
use)
plt.subplot(611)
plt.plot(x.date, x.open, ".", label="open")
plt.legend()
plt.subplot(612)
plt.plot(x.date, x.high, ".", label="high")
plt.legend()
plt.subplot(613)
plt.plot(x.date, x.low, ".", label="low")
plt.legend()
plt.subplot(614)
plt.plot(x.date, x.close, ".", label="close")
2. plt.legend()
plt.subplot(615)
plt.plot(x.date, x.volume, ".", label="volume")
plt.legend()
plt.subplot(616)
plt.plot(x.date, 100.*(x.high-x.low)/x.close, ".", label="range")
plt.legend()
plt.show()
# the same thing, but more concise and maintainable code, perhaps a bit more
pythonic
for sub, item in enumerate("open,high,low,close,volume".split(",")):
plt.subplot(5,1,sub+1)
plt.plot(x.date, x[item], ".", label = item)
plt.legend(loc="best")
plt.show()
# all of that was not interactive, plot shown only on show()
# would like to see what happens with each plotting command
# so turn on interactive mode. this might be more useful for either
# interactive data analysis or refinement of a plot's formatting
plt.isinteractive()
plt.ion()
plt.subplot(211)
plt.plot(x.date, x.close, ".", label="close")
plt.subplot(212)
plt.plot(x.date, 100.*(x.high-x.low)/x.close, ".", label="range")
plt.close()
plt.ioff()
# plot daily range to a file instead of interactive
plt.plot(x.date, 100.*(x.high-x.low)/x.close, ".")
plt.title("S&P Daily range (% of close")
plt.xlabel("date")
plt.ylabel("%")
plt.savefig("snp range.png")
plt.show()
# plot numerous plots to a multipage PDF file
# obvious pros and cons to raster versus vector image file formats
pp = PdfPages("example.pdf")
for item in "open,high,low,close,volume".split(","):
plt.plot(x.date, x[item], ".", label = item)
plt.title(item)
plt.legend(loc="best")
pp.savefig()
3. plt.close()
pp.close()
# usually I'll use an image manipulation program to add annotation
# but matplotlib supports a lot of annotation and this could be very useful
# here the daily range is plotted with an annotation on the max point
dailyrange = 100.*(x.high-x.low)/x.close
peak = (x.date[dailyrange.argmax()], dailyrange[dailyrange.argmax()])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x.date, dailyrange, ".")
ax.annotate("WOW!", xy=peak, xytext = (peak[0], peak[1] + 3), arrowprops =
dict(facecolor = "black"))
plt.show()
# now two examples not using the S&P data set
# plotting two sets of data and with two scales for the vertical axis
data1 = [1,2,3,4,5,6,5,4,3,2,1]
data2 = [1,2,1,2,3,1,2,1,3,1,0]
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
ax1.plot(data1, color = "red")
ax1.set_ylabel("red")
ax2.plot(data2, color = "blue")
ax2.set_ylabel("blue")
plt.show()
# often I don't want autoscaling
# it may be good to assert to find situations where data exceeds a fixed scale
# and of course, now, the two scales are now the same and are redundant
# plotting the same two sets of data with fixed identical scales
data1 = [1,2,3,4,5,6,5,4,3,2,1]
data2 = [1,2,1,2,3,1,2,1,3,1,0]
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
ax1.plot(data1, color = "red")
ax1.set_ylabel("red")
ax1.set_ylim(0, 10)
ax2.plot(data2, color = "blue")
ax2.set_ylabel("blue")
ax2.set_ylim(0, 10)
plt.show()