SlideShare a Scribd company logo
1 of 19
MACHINE LEARNING FOR EARTH SYSTEM SCIENCE
ADWAY MITRA
CENTRE OF EXCELLENCE IN AI, IIT KHARAGPUR
Live Session 1: Handling Geophysical Datasets
CONCEPTS COVERED
⮚ Raster Data formats
⮚ Vector Data Formats
⮚ Loading and visualizing data using Python interface
⮚ Extreme values statistics using Python
RASTER DATA FORMAT
Formats:
NetCDF (.nc) – netCDF4 package (Python)
HDF (hierarchical data format) – also netCDF4 package
- Raster data where each “pixel” stores geophysical
variable for a specific location
GeoTIFF- Geospatial Data Abstraction Library (GDAL)
(tagged image file format)
- Georeferenced data including satellite imagery, aerial
photography, topography or digital elevation maps etc
LOADING DATA FROM .NC
from netCDF4 import Dataset
nc_f = 'tpw_v07r01_200910.nc4.nc' # filename
nc_fid = Dataset(nc_f, 'r') print(nc_fid)
nc_fid.close()
import netCDF4
import numpy as np
f = netCDF4.Dataset('orography.nc', 'r')
lats = f.variables['lat']
lons = f.variables['lon']
orography = f.variables['orog']
print(lats[:])
print(lons[:])
print(orography[:])
f.close()
STORING DATA IN NC FILE
import netCDF4
import numpy as np
f = netCDF4.Dataset('orography.nc', 'w')
f.createDimension('time', None)
f.createDimension('z', 3)
f.createDimension('y', 4)
f.createDimension('x', 5)
lats = f.createVariable('lat', float, ('y', ), zlib=True)
lons = f.createVariable('lon', float, ('x', ), zlib=True)
orography = f.createVariable('orog', float, ('y', 'x'), zlib=True,
least_significant_digit=1, fill_value=0)
# create latitude and longitude 1D arrays
lat_out = [60, 65, 70, 75]
lon_out = [ 30, 60, 90, 120, 150]
# Create field values for orography
data_out = np.arange(4*5) # 1d array but with dimension x*y
data_out.shape = (4,5) # reshape to 2d array
orography[:] = data_out
lats[:] = lat_out
lons[:] = lon_out
# close file to write on disk
f.close()
DATA VISUALIZATION USING HDF
from netCDF4 import Dataset
f=Dataset('MISR_AM1_CGLS_MAY_2007_F04_0031.hdf','r')
print("Metadata for the dataset:")
print(f)
print("List of available variables (or key): ")
f.variable.keys()
print("Metadata for 'NDVI average' variable: ")
data=f.variables['NDVI average'][:]
plt.imshow(data)
plt.show()
f.close()
f.close()
from osgeo import gdal
datafile = gdal.Open('metos_python/data/Southern_Norway_and_Sweden.2017229.terra.1km.tif')
print( "Driver: ",datafile.GetDriver().ShortName, datafile.GetDriver().LongName)
print( "Size is ", datafile.RasterXSize, datafile.RasterYSize)
print( "Bands = ", datafile.RasterCount)
print( "Coordinate System is:", datafile.GetProjectionRef ())
print( "GetGeoTransform() = ", datafile.GetGeoTransform ())
print( "GetMetadata() = ", datafile.GetMetadata ())
EXPLORING GEOTIFF DATA
Driver: GTiff GeoTIFF
Size is 910 796
Bands = 3
Coordinate System is: GEOGCS["WGS 84",DATUM
["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,
AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],
PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],
AUTHORITY["EPSG","4326"]]
GetGeoTransform() = (4.0, 0.017582417582417617,
0.0, 62.0, 0.0, -0.008793969849246248)
GetMetadata() = {'AREA_OR_POINT': 'Area’,
'TIFFTAG_SOFTWARE': 'ppm2geotiff v0.0.9'}
bnd1 = datafile.GetRasterBand(1).ReadAsArray()
plt.imshow(bnd1)
plt.show()
VECTOR DATA FORMATS
Composed of points, lines and polygons Types:
Used for roads, boundaries etc Shapefiles
GeoJSON files
Shapefile operations:
from osgeo import ogr
shapedata = ogr.Open('Norway_places’)
layer = shapedata.GetLayer()
places_norway = []
for i in range(layer.GetFeatureCount()):
feature = layer.GetFeature(i)
name = feature.GetField("NAME")
geometry = feature.GetGeometryRef()
places_norway.append([i,name,geometry.GetGeometryName(), geometry.Centroid().ExportToWkt()])
print(places_norway[0:10])
[[0, 'Gol', 'POINT', 'POINT (8.9436636 60.7016106)'], [1, 'Halhjem', 'POINT', 'POINT
(5.4263602 60.1455207)'], [2, 'Tromsø', 'POINT', 'POINT (18.9517967 69.6669861)'], [3,
'Oslo', 'POINT', 'POINT (10.7391223 59.913263)'], [4, 'Narvik', 'POINT', 'POINT
(17.426652 68.4396792)'], [5, 'Bergen', 'POINT', 'POINT (5.3289029 60.3934769)'], [6,
'Hamna', 'POINT', 'POINT (18.9827839 69.7031209)'], [7, 'Stakkevollan', 'POINT',
'POINT (19.0031056 69.6937324)'], [8, 'Storslett', 'POINT', 'POINT (21.0301562
69.7694272)'], [9, 'Kvaløysletta', 'POINT', 'POINT (18.8708572 69.6953085)']]
PLOT SHAPEFILE POINT DATA ON A MAP
from osgeo import ogr
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=[12,15]) # a new figure window
ax = fig.add_subplot(1, 1, 1) # specify (nrows, ncols, axnum)
ax.set_title('Cities in Norway', fontsize=14)
map = Basemap(llcrnrlon=-1.0,urcrnrlon=40.,llcrnrlat=55.,urcrnrlat=75.,
resolution='i', projection='lcc', lat_1=65., lon_0=5.)
map.drawmapboundary(fill_color='aqua')
map.fillcontinents(color='#ffe2ab',lake_color='aqua')
map.drawcoastlines()
shapedata = ogr.Open('Norway_places')
layer = shapedata.GetLayer()
for i in range(layer.GetFeatureCount()):
feature = layer.GetFeature(i)
name = feature.GetField("NAME")
type = feature.GetField("TYPE")
if type == 'city':
geometry = feature.GetGeometryRef()
lon = geometry.GetPoint()[0]
lat = geometry.GetPoint()[1]
x,y = map(lon,lat)
map.plot(x, y, marker=marker, color='red', markersize=8, markeredgewidth=2)
ax.annotate(name, (x, y), color='blue', fontsize=14)
plt.show()
PyExtremes: a Python library for Extreme Value Analysis
Guidelines: https://pypi.org/project/pyextremes/
REFERENCES
 https://annefou.github.io/metos_python/02-formats/
 Pyextremes: https://github.com/georgebv/pyextremes
 https://geopandas.org/en/stable/
 https://pysal.org/esda/notebooks/spatialautocorrelation.html
LS1.pptx

More Related Content

Similar to LS1.pptx

MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...
MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...
MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...
MongoDB
 
Location Analytics - Real Time Geofencing using Apache Kafka
Location Analytics - Real Time Geofencing using Apache KafkaLocation Analytics - Real Time Geofencing using Apache Kafka
Location Analytics - Real Time Geofencing using Apache Kafka
Guido Schmutz
 
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Ontico
 
Incanter Data Sorcery
Incanter Data SorceryIncanter Data Sorcery
Incanter Data Sorcery
elliando dias
 

Similar to LS1.pptx (20)

20090701 Climate Data Staging
20090701 Climate Data Staging20090701 Climate Data Staging
20090701 Climate Data Staging
 
SF Big Analytics 20191112: How to performance-tune Spark applications in larg...
SF Big Analytics 20191112: How to performance-tune Spark applications in larg...SF Big Analytics 20191112: How to performance-tune Spark applications in larg...
SF Big Analytics 20191112: How to performance-tune Spark applications in larg...
 
ClusterAnalysis
ClusterAnalysisClusterAnalysis
ClusterAnalysis
 
MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...
MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...
MongoDB for Time Series Data: Analyzing Time Series Data Using the Aggregatio...
 
Multi-core GPU – Fast parallel SAR image generation
Multi-core GPU – Fast parallel SAR image generationMulti-core GPU – Fast parallel SAR image generation
Multi-core GPU – Fast parallel SAR image generation
 
Location Analytics - Real Time Geofencing using Apache Kafka
Location Analytics - Real Time Geofencing using Apache KafkaLocation Analytics - Real Time Geofencing using Apache Kafka
Location Analytics - Real Time Geofencing using Apache Kafka
 
R getting spatial
R getting spatialR getting spatial
R getting spatial
 
Geographical information system unit 2
Geographical information  system unit 2Geographical information  system unit 2
Geographical information system unit 2
 
Metadata/Time-Date Tools (Toolkit_MTD)
Metadata/Time-Date Tools (Toolkit_MTD)Metadata/Time-Date Tools (Toolkit_MTD)
Metadata/Time-Date Tools (Toolkit_MTD)
 
design_doc
design_docdesign_doc
design_doc
 
Metadata syncronisation with GeoNetwork - a users perspective
Metadata syncronisation with GeoNetwork - a users perspectiveMetadata syncronisation with GeoNetwork - a users perspective
Metadata syncronisation with GeoNetwork - a users perspective
 
Opensource gis development - part 3
Opensource gis development - part 3Opensource gis development - part 3
Opensource gis development - part 3
 
10. R getting spatial
10.  R getting spatial10.  R getting spatial
10. R getting spatial
 
LocationTech Projects
LocationTech ProjectsLocationTech Projects
LocationTech Projects
 
EECSCon Poster
EECSCon PosterEECSCon Poster
EECSCon Poster
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
An Overview of HDF-EOS (Part 1)
An Overview of HDF-EOS (Part 1)An Overview of HDF-EOS (Part 1)
An Overview of HDF-EOS (Part 1)
 
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
 
Incanter Data Sorcery
Incanter Data SorceryIncanter Data Sorcery
Incanter Data Sorcery
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting Spatial
 

Recently uploaded

Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...
Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...
Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...
Krashi Coaching
 
1029-Danh muc Sach Giao Khoa khoi 6.pdf
1029-Danh muc Sach Giao Khoa khoi  6.pdf1029-Danh muc Sach Giao Khoa khoi  6.pdf
1029-Danh muc Sach Giao Khoa khoi 6.pdf
QucHHunhnh
 

Recently uploaded (20)

Web & Social Media Analytics Previous Year Question Paper.pdf
Web & Social Media Analytics Previous Year Question Paper.pdfWeb & Social Media Analytics Previous Year Question Paper.pdf
Web & Social Media Analytics Previous Year Question Paper.pdf
 
fourth grading exam for kindergarten in writing
fourth grading exam for kindergarten in writingfourth grading exam for kindergarten in writing
fourth grading exam for kindergarten in writing
 
microwave assisted reaction. General introduction
microwave assisted reaction. General introductionmicrowave assisted reaction. General introduction
microwave assisted reaction. General introduction
 
Nutritional Needs Presentation - HLTH 104
Nutritional Needs Presentation - HLTH 104Nutritional Needs Presentation - HLTH 104
Nutritional Needs Presentation - HLTH 104
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdf
 
Class 11th Physics NEET formula sheet pdf
Class 11th Physics NEET formula sheet pdfClass 11th Physics NEET formula sheet pdf
Class 11th Physics NEET formula sheet pdf
 
Sanyam Choudhary Chemistry practical.pdf
Sanyam Choudhary Chemistry practical.pdfSanyam Choudhary Chemistry practical.pdf
Sanyam Choudhary Chemistry practical.pdf
 
Grant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy ConsultingGrant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy Consulting
 
Holdier Curriculum Vitae (April 2024).pdf
Holdier Curriculum Vitae (April 2024).pdfHoldier Curriculum Vitae (April 2024).pdf
Holdier Curriculum Vitae (April 2024).pdf
 
Interactive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationInteractive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communication
 
Call Girls in Dwarka Mor Delhi Contact Us 9654467111
Call Girls in Dwarka Mor Delhi Contact Us 9654467111Call Girls in Dwarka Mor Delhi Contact Us 9654467111
Call Girls in Dwarka Mor Delhi Contact Us 9654467111
 
Student login on Anyboli platform.helpin
Student login on Anyboli platform.helpinStudent login on Anyboli platform.helpin
Student login on Anyboli platform.helpin
 
9548086042 for call girls in Indira Nagar with room service
9548086042  for call girls in Indira Nagar  with room service9548086042  for call girls in Indira Nagar  with room service
9548086042 for call girls in Indira Nagar with room service
 
Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...
Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...
Kisan Call Centre - To harness potential of ICT in Agriculture by answer farm...
 
Accessible design: Minimum effort, maximum impact
Accessible design: Minimum effort, maximum impactAccessible design: Minimum effort, maximum impact
Accessible design: Minimum effort, maximum impact
 
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
 
Disha NEET Physics Guide for classes 11 and 12.pdf
Disha NEET Physics Guide for classes 11 and 12.pdfDisha NEET Physics Guide for classes 11 and 12.pdf
Disha NEET Physics Guide for classes 11 and 12.pdf
 
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptxSOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
 
1029-Danh muc Sach Giao Khoa khoi 6.pdf
1029-Danh muc Sach Giao Khoa khoi  6.pdf1029-Danh muc Sach Giao Khoa khoi  6.pdf
1029-Danh muc Sach Giao Khoa khoi 6.pdf
 

LS1.pptx

  • 1. MACHINE LEARNING FOR EARTH SYSTEM SCIENCE ADWAY MITRA CENTRE OF EXCELLENCE IN AI, IIT KHARAGPUR Live Session 1: Handling Geophysical Datasets
  • 2. CONCEPTS COVERED ⮚ Raster Data formats ⮚ Vector Data Formats ⮚ Loading and visualizing data using Python interface ⮚ Extreme values statistics using Python
  • 3. RASTER DATA FORMAT Formats: NetCDF (.nc) – netCDF4 package (Python) HDF (hierarchical data format) – also netCDF4 package - Raster data where each “pixel” stores geophysical variable for a specific location GeoTIFF- Geospatial Data Abstraction Library (GDAL) (tagged image file format) - Georeferenced data including satellite imagery, aerial photography, topography or digital elevation maps etc
  • 4. LOADING DATA FROM .NC from netCDF4 import Dataset nc_f = 'tpw_v07r01_200910.nc4.nc' # filename nc_fid = Dataset(nc_f, 'r') print(nc_fid) nc_fid.close() import netCDF4 import numpy as np f = netCDF4.Dataset('orography.nc', 'r') lats = f.variables['lat'] lons = f.variables['lon'] orography = f.variables['orog'] print(lats[:]) print(lons[:]) print(orography[:]) f.close() STORING DATA IN NC FILE import netCDF4 import numpy as np f = netCDF4.Dataset('orography.nc', 'w') f.createDimension('time', None) f.createDimension('z', 3) f.createDimension('y', 4) f.createDimension('x', 5) lats = f.createVariable('lat', float, ('y', ), zlib=True) lons = f.createVariable('lon', float, ('x', ), zlib=True) orography = f.createVariable('orog', float, ('y', 'x'), zlib=True, least_significant_digit=1, fill_value=0) # create latitude and longitude 1D arrays lat_out = [60, 65, 70, 75] lon_out = [ 30, 60, 90, 120, 150] # Create field values for orography data_out = np.arange(4*5) # 1d array but with dimension x*y data_out.shape = (4,5) # reshape to 2d array orography[:] = data_out lats[:] = lat_out lons[:] = lon_out # close file to write on disk f.close()
  • 5. DATA VISUALIZATION USING HDF from netCDF4 import Dataset f=Dataset('MISR_AM1_CGLS_MAY_2007_F04_0031.hdf','r') print("Metadata for the dataset:") print(f) print("List of available variables (or key): ") f.variable.keys() print("Metadata for 'NDVI average' variable: ") data=f.variables['NDVI average'][:] plt.imshow(data) plt.show() f.close() f.close()
  • 6. from osgeo import gdal datafile = gdal.Open('metos_python/data/Southern_Norway_and_Sweden.2017229.terra.1km.tif') print( "Driver: ",datafile.GetDriver().ShortName, datafile.GetDriver().LongName) print( "Size is ", datafile.RasterXSize, datafile.RasterYSize) print( "Bands = ", datafile.RasterCount) print( "Coordinate System is:", datafile.GetProjectionRef ()) print( "GetGeoTransform() = ", datafile.GetGeoTransform ()) print( "GetMetadata() = ", datafile.GetMetadata ()) EXPLORING GEOTIFF DATA Driver: GTiff GeoTIFF Size is 910 796 Bands = 3 Coordinate System is: GEOGCS["WGS 84",DATUM ["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]], PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433], AUTHORITY["EPSG","4326"]] GetGeoTransform() = (4.0, 0.017582417582417617, 0.0, 62.0, 0.0, -0.008793969849246248) GetMetadata() = {'AREA_OR_POINT': 'Area’, 'TIFFTAG_SOFTWARE': 'ppm2geotiff v0.0.9'} bnd1 = datafile.GetRasterBand(1).ReadAsArray() plt.imshow(bnd1) plt.show()
  • 7. VECTOR DATA FORMATS Composed of points, lines and polygons Types: Used for roads, boundaries etc Shapefiles GeoJSON files Shapefile operations: from osgeo import ogr shapedata = ogr.Open('Norway_places’) layer = shapedata.GetLayer() places_norway = [] for i in range(layer.GetFeatureCount()): feature = layer.GetFeature(i) name = feature.GetField("NAME") geometry = feature.GetGeometryRef() places_norway.append([i,name,geometry.GetGeometryName(), geometry.Centroid().ExportToWkt()]) print(places_norway[0:10]) [[0, 'Gol', 'POINT', 'POINT (8.9436636 60.7016106)'], [1, 'Halhjem', 'POINT', 'POINT (5.4263602 60.1455207)'], [2, 'Tromsø', 'POINT', 'POINT (18.9517967 69.6669861)'], [3, 'Oslo', 'POINT', 'POINT (10.7391223 59.913263)'], [4, 'Narvik', 'POINT', 'POINT (17.426652 68.4396792)'], [5, 'Bergen', 'POINT', 'POINT (5.3289029 60.3934769)'], [6, 'Hamna', 'POINT', 'POINT (18.9827839 69.7031209)'], [7, 'Stakkevollan', 'POINT', 'POINT (19.0031056 69.6937324)'], [8, 'Storslett', 'POINT', 'POINT (21.0301562 69.7694272)'], [9, 'Kvaløysletta', 'POINT', 'POINT (18.8708572 69.6953085)']]
  • 8. PLOT SHAPEFILE POINT DATA ON A MAP from osgeo import ogr from mpl_toolkits.basemap import Basemap import matplotlib.pyplot as plt fig = plt.figure(figsize=[12,15]) # a new figure window ax = fig.add_subplot(1, 1, 1) # specify (nrows, ncols, axnum) ax.set_title('Cities in Norway', fontsize=14) map = Basemap(llcrnrlon=-1.0,urcrnrlon=40.,llcrnrlat=55.,urcrnrlat=75., resolution='i', projection='lcc', lat_1=65., lon_0=5.) map.drawmapboundary(fill_color='aqua') map.fillcontinents(color='#ffe2ab',lake_color='aqua') map.drawcoastlines() shapedata = ogr.Open('Norway_places') layer = shapedata.GetLayer() for i in range(layer.GetFeatureCount()): feature = layer.GetFeature(i) name = feature.GetField("NAME") type = feature.GetField("TYPE") if type == 'city': geometry = feature.GetGeometryRef() lon = geometry.GetPoint()[0] lat = geometry.GetPoint()[1] x,y = map(lon,lat) map.plot(x, y, marker=marker, color='red', markersize=8, markeredgewidth=2) ax.annotate(name, (x, y), color='blue', fontsize=14) plt.show()
  • 9.
  • 10.
  • 11.
  • 12. PyExtremes: a Python library for Extreme Value Analysis Guidelines: https://pypi.org/project/pyextremes/
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18. REFERENCES  https://annefou.github.io/metos_python/02-formats/  Pyextremes: https://github.com/georgebv/pyextremes  https://geopandas.org/en/stable/  https://pysal.org/esda/notebooks/spatialautocorrelation.html