Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

PyData Barcelona - weather and climate data

279 views

Published on

Talk at PyData Barcelona about weather and climate data

Published in: Data & Analytics
  • Be the first to comment

PyData Barcelona - weather and climate data

  1. 1. B E G I N N E R S G U I D E T O W E AT H E R A N D C L I M AT E D ATA D R M A R G R I E T G R O E N E N D I J K D E V E L O P E R A D V O C AT E - I B M WAT S O N D ATA P L AT F O R M @ M A R G R I E T G R 2 0 M AY 2 0 1 7 - P Y D ATA - B A R C E L O N A
  2. 2. B E G I N N E R S G U I D E T O W E AT H E R A N D C L I M AT E D ATA S L I D E S H T T P S : / / W W W. S L I D E S H A R E . N E T / M A R G R I E T G R O E N E N D I J K / P R E S E N TAT I O N S
  3. 3. W E AT H E R F O R E C A S T
  4. 4. W E AT H E R F O R E C A S T I N A N O T E B O O K
  5. 5. J U P Y T E R N O T E B O O K https://jupyter.org
  6. 6. T H E W E AT H E R C O M PA N Y A P I D ATA Get access here: https://console.ng.bluemix.net/
  7. 7. W E AT H E R F O R E C A S T I N A N O T E B O O K Try it out here: https://datascience.ibm.com Notebook: https://github.com/ibm-cds-labs/python-notebooks
  8. 8. S C R E E N S H O T O F N O T E B O O K
  9. 9. S C R E E N S H O T O F N O T E B O O K
  10. 10. %%javascript navigator.geolocation.getCurrentPosition(function(position) { console.log(position.coords.latitude, position.coords.longitude); setTimeout(function() { IPython.notebook.kernel.execute('lat="' + position.coords.latitude + '";') IPython.notebook.kernel.execute('lon="' + position.coords.longitude + '";') },5000) }); import requests import json line='https://'+username+':'+password+ '@twcservice.mybluemix.net/api/weather/v1/geocode/'+ lat+'/'+lon+'/forecast/intraday/10day.json?&units=m' r=requests.get(line) weather = json.loads(r.text) Get access to the API here: https://console.ng.bluemix.net/
  11. 11. print json.dumps(weather, indent=4, sort_keys=True) { "forecasts": [ { "class": "fod_long_range_intraday", "clds": 42, "dow": "Tuesday", ... "temp": 13, "wdir": 261, }, ], "metadata": { "expire_time_gmt": 1491904587, "latitude": 51.45, "longitude": -2.58, ... } }
  12. 12. import pandas as pd from datetime import datetime df = pd.DataFrame.from_dict(weather['forecasts'][0],orient='index').transpose() for forecast in weather['forecasts'][1:]: df = pd.concat([df,pd.DataFrame.from_dict(forecast,orient='index').transpose()]) df['date'] = df['fcst_valid_local'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S+0200'))
  13. 13. L A M B D A A N D PA N D A S D ATA F R A M E S A N O N Y M O U S F U N C T I O N df['date'] = df['fcst_valid_local'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S+0200'))
  14. 14. S O M E M O R E C L E A N I N G U P df = df.drop([‘expire_time_gmt’],1) df['temp']=df['temp'].apply(pd.to_numeric) df.head()
  15. 15. P L O T W I T H M AT P L O T L I B import matplotlib.pyplot as plt import matplotlib %matplotlib inline fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(14, 8)) df['rain'].plot(ax=axes[0], kind='bar', color='#C93D79',sharex=True) axes[0].set_title('Chance of rain',loc='left',fontsize=20) df['temp'].plot(ax=axes[1], color='#6EEDD8',lw=4.0,sharex=True) axes[1].set_title('Temperature',loc='left',fontsize=20)
  16. 16. cities = [ ('Bristol',51.44999778,-2.583315472), ... ('Portsmouth',50.80034751,-1.080022218)] icons=[] temps=[] for city in cities: lat = city[1] lon = city[2] line='https://'+username+':'+password+'@twcservice.mybluemix.net/api/ weather/v1/geocode/'+str(lat)+'/'+str(lon)+'/observations.json?&units=m' r=requests.get(line) weather = json.loads(r.text) icons=np.append(icons,weather['observation']['wx_icon']) temps=np.append(temps,weather['observation']['temp'])
  17. 17. from mpl_toolkits.basemap import Basemap from matplotlib.offsetbox import AnnotationBbox, OffsetImage from matplotlib._png import read_png from itertools import izip import urllib matplotlib.style.use('bmh') fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 12)) # background maps m1 = Basemap(projection='mill',resolution=None,llcrnrlon=-7.5,llcrnrlat=49.84,urcrnrlon=2.5,urcrnrlat=59,ax=axes[0]) m1.drawlsmask(land_color='dimgrey',ocean_color='dodgerBlue',lakes=True) m2 = Basemap(projection='mill',resolution=None,llcrnrlon=-7.5,llcrnrlat=49.84,urcrnrlon=2.5,urcrnrlat=59,ax=axes[1]) m2.drawlsmask(land_color='dimgrey',ocean_color='dodgerBlue',lakes=True) # weather icons map for [icon,city] in izip(icons,cities): lat = city[1] lon = city[2] try: pngfile=urllib.urlopen('https://github.com/ibm-cds-labs/python-notebooks/blob/master/weathericons/icon'+str(int(icon))+'.png?raw=true') icon_hand = read_png(pngfile) imagebox = OffsetImage(icon_hand, zoom=.15) ab = AnnotationBbox(imagebox,m1(lon,lat),frameon=False) axes[0].add_artist(ab) except: pass # temperature map for [temp,city] in izip(temps,cities): lat = city[1] lon = city[2] if temp>16: col='indigo' elif temp>14: col='darkmagenta' elif temp>12: col='red' elif temp>10: col='tomato' elif temp>0: col='turquoise' x1, y1 = m2(lon,lat) bbox_props = dict(boxstyle="round,pad=0.3", fc=col, ec=col, lw=2) axes[1].text(x1, y1, temp, ha="center", va="center", size=11,bbox=bbox_props)
  18. 18. P I X I E D U S T O P E N S O U R C E https://ibm-cds-labs.github.io/pixiedust/ !pip install --upgrade pixiedust
  19. 19. P I X I E D U S T A N D M A P B O X dfmap = pd.DataFrame(cities, columns=['city','lat','lon']) dfmap['temp']=temps dfmap[‘icon']=icons display(dfmap)
  20. 20. P I X I E A P P S
  21. 21. W H E R E D O E S T H E D ATA C O M E F R O M ? B U T
  22. 22. O B S E R VAT I O N S + M O D E L S
  23. 23. O B S E R VAT I O N S • Temperature • Humidity • Windspeed and direction • Air pressure • Rainfall • Radiation http://www.metoffice.gov.uk/public/ weather/climate-network/#? tab=climateNetwork
  24. 24. H I S T O R I C W E AT H E R • http:// www.metoffice.gov.uk/ datapoint/ • https:// business.weather.com/ products/the-weather- company-data-packages • https://climexp.knmi.nl • http://www.ecmwf.int/en/ forecasts/datasets
  25. 25. I WA N T A M A P… B U T
  26. 26. P O I N T S T O G R I D T H E P R O B L E M
  27. 27. from scipy.interpolate import griddata # grid of latitude and longitude values x = np.linspace(49.0,59.0,100) y = np.linspace(-6,2,100) X, Y = np.meshgrid(x,y) px = points['lat'].as_matrix() py = points['lon'].as_matrix() pz = points['temp'].as_matrix() fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(18, 8)) for i, method in enumerate(('nearest', 'linear', 'cubic')): Ti = griddata((px, py), pz, (X, Y), method=method) ax[i].contourf(X, Y, Ti) ax[i].set_title('method = {}'.format(method)) ax[i].scatter(px, py, c='k', marker='o')
  28. 28. H O W C A N I W O R K W I T H T H I S D ATA ? C O O L , B U T …
  29. 29. N E T C D F B I N A RY F I L E S
  30. 30. from netCDF4 import Dataset, num2date import numpy as np cfile = 'assets/HadCRUT.4.5.0.0.median.nc' dataset = Dataset(cfile) print dataset.data_model print dataset.variables NETCDF4 OrderedDict([(u'latitude', <type 'netCDF4._netCDF4.Variable'> float32 latitude(latitude) standard_name: latitude long_name: latitude point_spacing: even units: degrees_north axis: Y unlimited dimensions: ts: days since 1850-1-1 00:00:00 calendar: gregorian start_year: 1850 ... Data is here: https://crudata.uea.ac.uk/cru/data/temperature/
  31. 31. import scipy import matplotlib from pylab import * from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid, maskoceans # define the area to plot and projection to use m = Basemap(llcrnrlon=-180,llcrnrlat=-60,urcrnrlon=180,urcrnrlat=80,projection= 'mill') # covert the latitude, longitude and temperatures to raster coordinates to be plotted t1=temperature[0,:,:] t1,lon=addcyclic(t1,lons) january,longitude=shiftgrid(180.,t1,lon,start=False) x,y=np.meshgrid(longitude,lats) px,py=m(x,y)
  32. 32. rcParams['font.size']=12 rcParams['figure.figsize']=[8.0, 6.0] figure() palette=cm.RdYlBu_r rmin=-30.; rmax=30. ncont=20 dc=(rmax-rmin)/ncont vc=arange(rmin,rmax+dc,dc) pal_norm=matplotlib.colors.Normalize(vmin = rmin, vmax = rmax, clip = False) m.drawcoastlines(linewidth=0.5) m.drawmapboundary(fill_color=(1.0,1.0,1.0)) cf=m.pcolormesh(px, py, january, cmap = palette) cbar=colorbar(cf,orientation='horizontal', shrink=0.95) cbar.set_label('Mean Temperature in January') tight_layout()
  33. 33. W H AT A B O U T F O R E C A S T S A N D P R E D I C T I O N S ? T H I S D ATA I S A L L B A S E D O N M E A S U R E M E N T S …
  34. 34. C L I M AT E M O D E L S
  35. 35. C L I M AT E W I T H D I F F E R E N T S C E N A R I O S M O D E L E X P E R I M E N T S
  36. 36. G L O B A L T E M P E R AT U R E E X P L A I N E D https://www.bloomberg.com/graphics/2015-whats-warming-the-world/
  37. 37. W H AT C A N I U S E W E AT H E R D ATA F O R ?
  38. 38. in vehicle hail damage claims every year increase in temperature means $24M more in electricity spending per day drop in sales for areas with more than a 10% drop in temperature I N S U R A N C E E N E R G Y R E TA I L A P P L I C AT I O N S
  39. 39. W E AT H E R A N D T R A F F I C C O L L I S I O N S E X A M P L E https://www.pexels.com/photo/blur-cars-dew-drops-125510/
  40. 40. N Y P D T R A F F I C C O L L I S I O N S E X A M P L E https://data.cityofnewyork.us/ Public-Safety/NYPD-Motor- Vehicle-Collisions/h9gi-nx95
  41. 41. 8 1 2 , 5 2 6 T R A F F I C C O L L I S I O N S S I N C E A P R I L 2 0 1 4
  42. 42. N Y P D T R A F F I C C O L L I S I O N S https://apsportal.ibm.com/exchange/public/entry/view/5a7051906b8fe9cc1ba126b53edd948e
  43. 43. T E M P E R AT U R E F O R T H E 5 B O R O U G H S
  44. 44. H O W T O C O M B I N E T H E D ATA manhattan_merged = pd.merge_asof(manhattan.sort_values(by='Date'), weather.sort_values(by=‘date’), left_on='Date',right_on='date', tolerance=pd.Timedelta('6h'))
  45. 45. H O W T O C O M B I N E T H E D ATA def perdelta(start, end, delta): curr = start while curr < end: yield curr curr += delta for result in perdelta(datetime(2017,4,1,0), datetime(2017,4,15,23), timedelta(hours=1)): colhour = manhattan.loc[manhattan['Date'] == result] hour = pd.DataFrame([[result,borough,len(colhour.index), colhour['Persons Injured'].sum(), colhour['Persons Killed'].sum(), if result == datetime(2017,4,1,0): newhour = hour.copy() else: newhour = newhour.append(hour)
  46. 46. H O W T O C O M B I N E T H E D ATA Find out how weather impacts traffic collisions in New York: https://medium.com/ibm-watson-data-lab
  47. 47. Hackathon June 10-11, 2017 Galvanize, San Francisco Code Challenge June/July 2017 The SETI Institute Register Now! seti.org/ML4SETI
  48. 48. R E F E R E N C E S • IBM Bluemix - https://console.ng.bluemix.net/ • IBM Data Science Experience - https://datascience.ibm.com • PixieDust - https://ibm-cds-labs.github.io/pixiedust/ • Slides - https://www.slideshare.net/MargrietGroenendijk/ presentations • Notebooks - https://github.com/ibm-cds-labs/python-notebooks • Me - mgroenen@uk.ibm.com - @MargrietGr

×