1. B E G I N N E R S G U I D E T O
W E AT H E R A N D C L I M AT E D ATA
D R M A R G R I E T G R O E N E N D I J K
D E V E L O P E R A D V O C AT E - I B M WAT S O N D ATA P L AT F O R M
@ M A R G R I E T G R
2 0 M AY 2 0 1 7 - P Y D ATA - B A R C E L O N A
2. B E G I N N E R S G U I D E T O
W E AT H E R A N D C L I M AT E D ATA
S L I D E S
H T T P S : / / W W W. S L I D E S H A R E . N E T /
M A R G R I E T G R O E N E N D I J K / P R E S E N TAT I O N S
4. W E AT H E R
F O R E C A S T I N A
N O T E B O O K
5. J U P Y T E R
N O T E B O O K
https://jupyter.org
6. T H E W E AT H E R
C O M PA N Y A P I
D ATA
Get access here:
https://console.ng.bluemix.net/
7. W E AT H E R
F O R E C A S T I N A
N O T E B O O K
Try it out here:
https://datascience.ibm.com
Notebook:
https://github.com/ibm-cds-labs/python-notebooks
12. import pandas as pd
from datetime import datetime
df =
pd.DataFrame.from_dict(weather['forecasts'][0],orient='index').transpose()
for forecast in weather['forecasts'][1:]:
df =
pd.concat([df,pd.DataFrame.from_dict(forecast,orient='index').transpose()])
df['date'] = df['fcst_valid_local'].apply(lambda x: datetime.strptime(x,
'%Y-%m-%dT%H:%M:%S+0200'))
13. L A M B D A A N D PA N D A S D ATA F R A M E S
A N O N Y M O U S F U N C T I O N
df['date'] = df['fcst_valid_local'].apply(lambda x: datetime.strptime(x,
'%Y-%m-%dT%H:%M:%S+0200'))
14. S O M E M O R E C L E A N I N G U P
df = df.drop([‘expire_time_gmt’],1)
df['temp']=df['temp'].apply(pd.to_numeric)
df.head()
15. P L O T W I T H M AT P L O T L I B
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(14, 8))
df['rain'].plot(ax=axes[0], kind='bar', color='#C93D79',sharex=True)
axes[0].set_title('Chance of rain',loc='left',fontsize=20)
df['temp'].plot(ax=axes[1], color='#6EEDD8',lw=4.0,sharex=True)
axes[1].set_title('Temperature',loc='left',fontsize=20)
17. cities = [
('Bristol',51.44999778,-2.583315472),
...
('Portsmouth',50.80034751,-1.080022218)]
icons=[]
temps=[]
for city in cities:
lat = city[1]
lon = city[2]
line='https://'+username+':'+password+'@twcservice.mybluemix.net/api/
weather/v1/geocode/'+str(lat)+'/'+str(lon)+'/observations.json?&units=m'
r=requests.get(line)
weather = json.loads(r.text)
icons=np.append(icons,weather['observation']['wx_icon'])
temps=np.append(temps,weather['observation']['temp'])
18. from mpl_toolkits.basemap import Basemap
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from matplotlib._png import read_png
from itertools import izip
import urllib
matplotlib.style.use('bmh')
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 12))
# background maps
m1 = Basemap(projection='mill',resolution=None,llcrnrlon=-7.5,llcrnrlat=49.84,urcrnrlon=2.5,urcrnrlat=59,ax=axes[0])
m1.drawlsmask(land_color='dimgrey',ocean_color='dodgerBlue',lakes=True)
m2 = Basemap(projection='mill',resolution=None,llcrnrlon=-7.5,llcrnrlat=49.84,urcrnrlon=2.5,urcrnrlat=59,ax=axes[1])
m2.drawlsmask(land_color='dimgrey',ocean_color='dodgerBlue',lakes=True)
# weather icons map
for [icon,city] in izip(icons,cities):
lat = city[1]
lon = city[2]
try:
pngfile=urllib.urlopen('https://github.com/ibm-cds-labs/python-notebooks/blob/master/weathericons/icon'+str(int(icon))+'.png?raw=true')
icon_hand = read_png(pngfile)
imagebox = OffsetImage(icon_hand, zoom=.15)
ab = AnnotationBbox(imagebox,m1(lon,lat),frameon=False)
axes[0].add_artist(ab)
except:
pass
# temperature map
for [temp,city] in izip(temps,cities):
lat = city[1]
lon = city[2]
if temp>16:
col='indigo'
elif temp>14:
col='darkmagenta'
elif temp>12:
col='red'
elif temp>10:
col='tomato'
elif temp>0:
col='turquoise'
x1, y1 = m2(lon,lat)
bbox_props = dict(boxstyle="round,pad=0.3", fc=col, ec=col, lw=2)
axes[1].text(x1, y1, temp, ha="center", va="center",
size=11,bbox=bbox_props)
20. P I X I E D U S T
O P E N S O U R C E
https://ibm-cds-labs.github.io/pixiedust/
!pip install --upgrade pixiedust
22. P I X I E D U S T A N D M A P B O X
dfmap = pd.DataFrame(cities,
columns=['city','lat','lon'])
dfmap['temp']=temps
dfmap[‘icon']=icons
display(dfmap)
26. O B S E R VAT I O N S
• Temperature
• Humidity
• Windspeed and direction
• Air pressure
• Rainfall
• Radiation
http://www.metoffice.gov.uk/public/
weather/climate-network/#?
tab=climateNetwork
27. H I S T O R I C W E AT H E R
• http://
www.metoffice.gov.uk/
datapoint/
• https://
business.weather.com/
products/the-weather-
company-data-packages
• https://climexp.knmi.nl
• http://www.ecmwf.int/en/
forecasts/datasets
34. from netCDF4 import Dataset, num2date
import numpy as np
cfile = 'assets/HadCRUT.4.5.0.0.median.nc'
dataset = Dataset(cfile)
print dataset.data_model
print dataset.variables
NETCDF4
OrderedDict([(u'latitude', <type 'netCDF4._netCDF4.Variable'>
float32 latitude(latitude)
standard_name: latitude
long_name: latitude
point_spacing: even
units: degrees_north
axis: Y
unlimited dimensions:
ts: days since 1850-1-1 00:00:00
calendar: gregorian
start_year: 1850
...
Data is here:
https://crudata.uea.ac.uk/cru/data/temperature/
35. import scipy
import matplotlib
from pylab import *
from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid, maskoceans
# define the area to plot and projection to use
m =
Basemap(llcrnrlon=-180,llcrnrlat=-60,urcrnrlon=180,urcrnrlat=80,projection=
'mill')
# covert the latitude, longitude and temperatures to raster coordinates to
be plotted
t1=temperature[0,:,:]
t1,lon=addcyclic(t1,lons)
january,longitude=shiftgrid(180.,t1,lon,start=False)
x,y=np.meshgrid(longitude,lats)
px,py=m(x,y)
46. W E AT H E R A N D
T R A F F I C
C O L L I S I O N S
E X A M P L E
https://www.pexels.com/photo/blur-cars-dew-drops-125510/
47. N Y P D T R A F F I C
C O L L I S I O N S
E X A M P L E
https://data.cityofnewyork.us/
Public-Safety/NYPD-Motor-
Vehicle-Collisions/h9gi-nx95
48. 8 1 2 , 5 2 6
T R A F F I C
C O L L I S I O N S
S I N C E A P R I L 2 0 1 4
49. N Y P D T R A F F I C C O L L I S I O N S
https://apsportal.ibm.com/exchange/public/entry/view/5a7051906b8fe9cc1ba126b53edd948e
50. T E M P E R AT U R E F O R T H E 5 B O R O U G H S
51. H O W T O C O M B I N E T H E D ATA
manhattan_merged = pd.merge_asof(manhattan.sort_values(by='Date'),
weather.sort_values(by=‘date’),
left_on='Date',right_on='date',
tolerance=pd.Timedelta('6h'))
52. H O W T O C O M B I N E T H E D ATA
def perdelta(start, end, delta):
curr = start
while curr < end:
yield curr
curr += delta
for result in perdelta(datetime(2017,4,1,0), datetime(2017,4,15,23),
timedelta(hours=1)):
colhour = manhattan.loc[manhattan['Date'] == result]
hour = pd.DataFrame([[result,borough,len(colhour.index),
colhour['Persons Injured'].sum(),
colhour['Persons Killed'].sum(),
if result == datetime(2017,4,1,0):
newhour = hour.copy()
else:
newhour = newhour.append(hour)
53. H O W T O C O M B I N E T H E D ATA
Find out how weather impacts traffic collisions in New York:
https://medium.com/ibm-watson-data-lab
55. R E F E R E N C E S
• IBM Bluemix - https://console.ng.bluemix.net/
• IBM Data Science Experience - https://datascience.ibm.com
• PixieDust - https://ibm-cds-labs.github.io/pixiedust/
• Slides - https://www.slideshare.net/MargrietGroenendijk/
presentations
• Notebooks - https://github.com/ibm-cds-labs/python-notebooks
• Me - mgroenen@uk.ibm.com - @MargrietGr