Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

PyData Barcelona - weather and climate data


Published on

Talk at PyData Barcelona about weather and climate data

Published in: Data & Analytics
  • Be the first to comment

PyData Barcelona - weather and climate data

  1. 1. B E G I N N E R S G U I D E T O W E AT H E R A N D C L I M AT E D ATA D R M A R G R I E T G R O E N E N D I J K D E V E L O P E R A D V O C AT E - I B M WAT S O N D ATA P L AT F O R M @ M A R G R I E T G R 2 0 M AY 2 0 1 7 - P Y D ATA - B A R C E L O N A
  2. 2. B E G I N N E R S G U I D E T O W E AT H E R A N D C L I M AT E D ATA S L I D E S H T T P S : / / W W W. S L I D E S H A R E . N E T / M A R G R I E T G R O E N E N D I J K / P R E S E N TAT I O N S
  3. 3. W E AT H E R F O R E C A S T
  4. 4. W E AT H E R F O R E C A S T I N A N O T E B O O K
  5. 5. J U P Y T E R N O T E B O O K
  6. 6. T H E W E AT H E R C O M PA N Y A P I D ATA Get access here:
  7. 7. W E AT H E R F O R E C A S T I N A N O T E B O O K Try it out here: Notebook:
  8. 8. S C R E E N S H O T O F N O T E B O O K
  9. 9. S C R E E N S H O T O F N O T E B O O K
  10. 10. %%javascript navigator.geolocation.getCurrentPosition(function(position) { console.log(position.coords.latitude, position.coords.longitude); setTimeout(function() { IPython.notebook.kernel.execute('lat="' + position.coords.latitude + '";') IPython.notebook.kernel.execute('lon="' + position.coords.longitude + '";') },5000) }); import requests import json line='https://'+username+':'+password+ ''+ lat+'/'+lon+'/forecast/intraday/10day.json?&units=m' r=requests.get(line) weather = json.loads(r.text) Get access to the API here:
  11. 11. print json.dumps(weather, indent=4, sort_keys=True) { "forecasts": [ { "class": "fod_long_range_intraday", "clds": 42, "dow": "Tuesday", ... "temp": 13, "wdir": 261, }, ], "metadata": { "expire_time_gmt": 1491904587, "latitude": 51.45, "longitude": -2.58, ... } }
  12. 12. import pandas as pd from datetime import datetime df = pd.DataFrame.from_dict(weather['forecasts'][0],orient='index').transpose() for forecast in weather['forecasts'][1:]: df = pd.concat([df,pd.DataFrame.from_dict(forecast,orient='index').transpose()]) df['date'] = df['fcst_valid_local'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S+0200'))
  13. 13. L A M B D A A N D PA N D A S D ATA F R A M E S A N O N Y M O U S F U N C T I O N df['date'] = df['fcst_valid_local'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S+0200'))
  14. 14. S O M E M O R E C L E A N I N G U P df = df.drop([‘expire_time_gmt’],1) df['temp']=df['temp'].apply(pd.to_numeric) df.head()
  15. 15. P L O T W I T H M AT P L O T L I B import matplotlib.pyplot as plt import matplotlib %matplotlib inline fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(14, 8)) df['rain'].plot(ax=axes[0], kind='bar', color='#C93D79',sharex=True) axes[0].set_title('Chance of rain',loc='left',fontsize=20) df['temp'].plot(ax=axes[1], color='#6EEDD8',lw=4.0,sharex=True) axes[1].set_title('Temperature',loc='left',fontsize=20)
  16. 16. cities = [ ('Bristol',51.44999778,-2.583315472), ... ('Portsmouth',50.80034751,-1.080022218)] icons=[] temps=[] for city in cities: lat = city[1] lon = city[2] line='https://'+username+':'+password+' weather/v1/geocode/'+str(lat)+'/'+str(lon)+'/observations.json?&units=m' r=requests.get(line) weather = json.loads(r.text) icons=np.append(icons,weather['observation']['wx_icon']) temps=np.append(temps,weather['observation']['temp'])
  17. 17. from mpl_toolkits.basemap import Basemap from matplotlib.offsetbox import AnnotationBbox, OffsetImage from matplotlib._png import read_png from itertools import izip import urllib'bmh') fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 12)) # background maps m1 = Basemap(projection='mill',resolution=None,llcrnrlon=-7.5,llcrnrlat=49.84,urcrnrlon=2.5,urcrnrlat=59,ax=axes[0]) m1.drawlsmask(land_color='dimgrey',ocean_color='dodgerBlue',lakes=True) m2 = Basemap(projection='mill',resolution=None,llcrnrlon=-7.5,llcrnrlat=49.84,urcrnrlon=2.5,urcrnrlat=59,ax=axes[1]) m2.drawlsmask(land_color='dimgrey',ocean_color='dodgerBlue',lakes=True) # weather icons map for [icon,city] in izip(icons,cities): lat = city[1] lon = city[2] try: pngfile=urllib.urlopen(''+str(int(icon))+'.png?raw=true') icon_hand = read_png(pngfile) imagebox = OffsetImage(icon_hand, zoom=.15) ab = AnnotationBbox(imagebox,m1(lon,lat),frameon=False) axes[0].add_artist(ab) except: pass # temperature map for [temp,city] in izip(temps,cities): lat = city[1] lon = city[2] if temp>16: col='indigo' elif temp>14: col='darkmagenta' elif temp>12: col='red' elif temp>10: col='tomato' elif temp>0: col='turquoise' x1, y1 = m2(lon,lat) bbox_props = dict(boxstyle="round,pad=0.3", fc=col, ec=col, lw=2) axes[1].text(x1, y1, temp, ha="center", va="center", size=11,bbox=bbox_props)
  18. 18. P I X I E D U S T O P E N S O U R C E !pip install --upgrade pixiedust
  19. 19. P I X I E D U S T A N D M A P B O X dfmap = pd.DataFrame(cities, columns=['city','lat','lon']) dfmap['temp']=temps dfmap[‘icon']=icons display(dfmap)
  20. 20. P I X I E A P P S
  21. 21. W H E R E D O E S T H E D ATA C O M E F R O M ? B U T
  22. 22. O B S E R VAT I O N S + M O D E L S
  23. 23. O B S E R VAT I O N S • Temperature • Humidity • Windspeed and direction • Air pressure • Rainfall • Radiation weather/climate-network/#? tab=climateNetwork
  24. 24. H I S T O R I C W E AT H E R • http:// datapoint/ • https:// products/the-weather- company-data-packages • • forecasts/datasets
  25. 25. I WA N T A M A P… B U T
  26. 26. P O I N T S T O G R I D T H E P R O B L E M
  27. 27. from scipy.interpolate import griddata # grid of latitude and longitude values x = np.linspace(49.0,59.0,100) y = np.linspace(-6,2,100) X, Y = np.meshgrid(x,y) px = points['lat'].as_matrix() py = points['lon'].as_matrix() pz = points['temp'].as_matrix() fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(18, 8)) for i, method in enumerate(('nearest', 'linear', 'cubic')): Ti = griddata((px, py), pz, (X, Y), method=method) ax[i].contourf(X, Y, Ti) ax[i].set_title('method = {}'.format(method)) ax[i].scatter(px, py, c='k', marker='o')
  28. 28. H O W C A N I W O R K W I T H T H I S D ATA ? C O O L , B U T …
  29. 29. N E T C D F B I N A RY F I L E S
  30. 30. from netCDF4 import Dataset, num2date import numpy as np cfile = 'assets/' dataset = Dataset(cfile) print dataset.data_model print dataset.variables NETCDF4 OrderedDict([(u'latitude', <type 'netCDF4._netCDF4.Variable'> float32 latitude(latitude) standard_name: latitude long_name: latitude point_spacing: even units: degrees_north axis: Y unlimited dimensions: ts: days since 1850-1-1 00:00:00 calendar: gregorian start_year: 1850 ... Data is here:
  31. 31. import scipy import matplotlib from pylab import * from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid, maskoceans # define the area to plot and projection to use m = Basemap(llcrnrlon=-180,llcrnrlat=-60,urcrnrlon=180,urcrnrlat=80,projection= 'mill') # covert the latitude, longitude and temperatures to raster coordinates to be plotted t1=temperature[0,:,:] t1,lon=addcyclic(t1,lons) january,longitude=shiftgrid(180.,t1,lon,start=False) x,y=np.meshgrid(longitude,lats) px,py=m(x,y)
  32. 32. rcParams['font.size']=12 rcParams['figure.figsize']=[8.0, 6.0] figure() palette=cm.RdYlBu_r rmin=-30.; rmax=30. ncont=20 dc=(rmax-rmin)/ncont vc=arange(rmin,rmax+dc,dc) pal_norm=matplotlib.colors.Normalize(vmin = rmin, vmax = rmax, clip = False) m.drawcoastlines(linewidth=0.5) m.drawmapboundary(fill_color=(1.0,1.0,1.0)) cf=m.pcolormesh(px, py, january, cmap = palette) cbar=colorbar(cf,orientation='horizontal', shrink=0.95) cbar.set_label('Mean Temperature in January') tight_layout()
  33. 33. W H AT A B O U T F O R E C A S T S A N D P R E D I C T I O N S ? T H I S D ATA I S A L L B A S E D O N M E A S U R E M E N T S …
  34. 34. C L I M AT E M O D E L S
  35. 35. C L I M AT E W I T H D I F F E R E N T S C E N A R I O S M O D E L E X P E R I M E N T S
  36. 36. G L O B A L T E M P E R AT U R E E X P L A I N E D
  37. 37. W H AT C A N I U S E W E AT H E R D ATA F O R ?
  38. 38. in vehicle hail damage claims every year increase in temperature means $24M more in electricity spending per day drop in sales for areas with more than a 10% drop in temperature I N S U R A N C E E N E R G Y R E TA I L A P P L I C AT I O N S
  39. 39. W E AT H E R A N D T R A F F I C C O L L I S I O N S E X A M P L E
  40. 40. N Y P D T R A F F I C C O L L I S I O N S E X A M P L E Public-Safety/NYPD-Motor- Vehicle-Collisions/h9gi-nx95
  41. 41. 8 1 2 , 5 2 6 T R A F F I C C O L L I S I O N S S I N C E A P R I L 2 0 1 4
  42. 42. N Y P D T R A F F I C C O L L I S I O N S
  43. 43. T E M P E R AT U R E F O R T H E 5 B O R O U G H S
  44. 44. H O W T O C O M B I N E T H E D ATA manhattan_merged = pd.merge_asof(manhattan.sort_values(by='Date'), weather.sort_values(by=‘date’), left_on='Date',right_on='date', tolerance=pd.Timedelta('6h'))
  45. 45. H O W T O C O M B I N E T H E D ATA def perdelta(start, end, delta): curr = start while curr < end: yield curr curr += delta for result in perdelta(datetime(2017,4,1,0), datetime(2017,4,15,23), timedelta(hours=1)): colhour = manhattan.loc[manhattan['Date'] == result] hour = pd.DataFrame([[result,borough,len(colhour.index), colhour['Persons Injured'].sum(), colhour['Persons Killed'].sum(), if result == datetime(2017,4,1,0): newhour = hour.copy() else: newhour = newhour.append(hour)
  46. 46. H O W T O C O M B I N E T H E D ATA Find out how weather impacts traffic collisions in New York:
  47. 47. Hackathon June 10-11, 2017 Galvanize, San Francisco Code Challenge June/July 2017 The SETI Institute Register Now!
  48. 48. R E F E R E N C E S • IBM Bluemix - • IBM Data Science Experience - • PixieDust - • Slides - presentations • Notebooks - • Me - - @MargrietGr