HDF5
                            Hierarchical Data Format




Thursday, January 5, 2012
/the/object/tree
                     • Datasets, Leaf
                      • Tables, records with fixed-length fields
                      • Arrays: Matrices of same type
                        • VLArray, EArray, Array
                     • Groups
                      • May contain groups and datasets
Thursday, January 5, 2012
from tables import *

                            # Define a user record to characterize some kind of particles
                            class Particle(IsDescription):
                                name      = StringCol(16)   # 16-character String
                                idnumber = Int64Col()       # Signed 64-bit integer
                                ADCcount = UInt16Col()      # Unsigned short integer
                                TDCcount = UInt8Col()       # unsigned byte
                                grid_i    = Int32Col()      # integer
                                grid_j    = Int32Col()      # integer
                                pressure = Float32Col()     # float (single-precision)
                                energy    = FloatCol()      # double (double-precision)

                            filename = "test.h5"
                            # Open a file in "w"rite mode
                            h5file = openFile(filename, mode = "w", title = "Test file")
                            # Create a new group under "/" (root)
                            group = h5file.createGroup("/", 'detector', 'Detector information')
                            # Create one table on it
                            table = h5file.createTable(group, 'readout', Particle, "Readout example")
                            # Fill the table with 10 particles
                            particle = table.row
                            for i in xrange(10):
                                particle['name'] = 'Particle: %6d' % (i)
                                particle['TDCcount'] = i % 256
                                particle['ADCcount'] = (i * 256) % (1 << 16)
                                particle['grid_i'] = i
                                particle['grid_j'] = 10 - i
                                particle['pressure'] = float(i*i)
                                particle['energy'] = float(particle['pressure'] ** 4)
                                particle['idnumber'] = i * (2 ** 34)
                                # Insert a new particle record
                                particle.append()
                            # Close (and flush) the file
                            h5file.close()




Thursday, January 5, 2012
Thursday, January 5, 2012
Filling a table
                            >>> class Particle(IsDescription):
                            ...     name      = StringCol(16)     #   16-character String
                            ...     idnumber = Int64Col()         #   Signed 64-bit integer
                            ...     ADCcount = UInt16Col()        #   Unsigned short integer
                            ...     TDCcount = UInt8Col()         #   unsigned byte
                            ...     grid_i    = Int32Col()        #   32-bit integer
                            ...     grid_j    = Int32Col()        #   32-bit integer
                            ...     pressure = Float32Col()       #   float (single-precision)
                            ...     energy    = Float64Col()      #   double (double-precision)




                            >>>   table = h5file.root.detector.readout
                            >>>   particle = table.row
                            >>>   for i in xrange(10, 15):
                            ...       particle['name'] = 'Particle: %6d' % (i)
                            ...       particle['TDCcount'] = i % 256
                            ...       particle['ADCcount'] = (i * 256) % (1 << 16)
                            ...       particle['grid_i'] = i
                            ...       particle['grid_j'] = 10 - i
                            ...       particle['pressure'] = float(i*i)
                            ...       particle['energy'] = float(particle['pressure'] ** 4)
                            ...       particle['idnumber'] = i * (2 ** 34)
                            ...       particle.append()
                            >>>   table.flush()




Thursday, January 5, 2012
Accessing a table:
                                 Slicing

                             >>> table.cols.TDCcount[0] = 1
                             >>> table.cols.energy[1:9:3] = [2,3,4]




Thursday, January 5, 2012
Search in Tables
                            >>> class Particle(IsDescription):
                            ...     name      = StringCol(16)    #   16-character String
                            ...     idnumber = Int64Col()        #   Signed 64-bit integer
                            ...     ADCcount = UInt16Col()       #   Unsigned short integer
                            ...     TDCcount = UInt8Col()        #   unsigned byte
                            ...     grid_i    = Int32Col()       #   32-bit integer
                            ...     grid_j    = Int32Col()       #   32-bit integer
                            ...     pressure = Float32Col()      #   float (single-precision)
                            ...     energy    = Float64Col()     #   double (double-precision)




>>> table = h5file.root.detector.readout
>>> pressure = [x['pressure'] for x in table.iterrows() if x['TDCcount'] > 3 and 20 <= x
['pressure'] < 50]
>>> pressure
[25.0, 36.0, 49.0]

                                             “In-Kernel” Version
>>> names = [ x['name'] for x in table.where("""(TDCcount > 3) & (20 <= pressure) & (pressure < 50)"
>>> names
['Particle:      5', 'Particle:      6', 'Particle:      7']

Thursday, January 5, 2012
Attributes

                            >>>   table = h5file.root.detector.readout
                            >>>   table.attrs.gath_date = "Wed, 06/12/2003 18:33"
                            >>>   table.attrs.temperature = 18.4
                            >>>   table.attrs.temp_scale = "Celsius"




Thursday, January 5, 2012
(C)Arrays
                    import numpy
                    import tables

                    fileName = 'carray1.h5'
                    shape = (200, 300)
                    atom = tables.UInt8Atom()
                    filters = tables.Filters(complevel=5, complib='zlib')

                    h5f = tables.openFile(fileName, 'w')
                    ca = h5f.createCArray(h5f.root, 'carray', atom, shape, filters=filters)

                    # Fill a hyperslab in ``ca``.
                    ca[10:60, 20:70] = numpy.ones((50, 50))
                    h5f.close()

                    # Re-open and read another hyperslab
                    h5f = tables.openFile(fileName)
                    print h5f
                    print h5f.root.carray[8:12, 18:22]
                    h5f.close()




Thursday, January 5, 2012
(E)Arrays
                   import tables
                   import numpy

                   fileh = tables.openFile('earray1.h5', mode='w')
                   a = tables.StringAtom(itemsize=8)

                   # Use ''a'' as the object type for the enlargeable array.
                   array_c = fileh.createEArray(fileh.root, 'array_c', a, (0,), "Chars")
                   array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8'))
                   array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))

                   # Read the string ''EArray'' we have created on disk.
                   for s in array_c:
                       print 'array_c[%s] => %r' % (array_c.nrow, s)

                   # Close the file.
                   fileh.close()




Thursday, January 5, 2012
Pytables likes Numpy
>>> gcolumns = h5file.createGroup(h5file.root, "columns", "Pressure and Name")

>>> h5file.createArray(gcolumns, 'pressure', array(pressure))
"Pressure column selection")
/columns/pressure (Array(3,)) 'Pressure column selection'
  atom := Float64Atom(shape=(), dflt=0.0)
  maindim := 0
  flavor := 'numpy'
  byteorder := 'little'
  chunkshape := None

>>> h5file.createArray(gcolumns, 'name', names, "Name column selection")
/columns/name (Array(3,)) 'Name column selection'
  atom := StringAtom(itemsize=16, shape=(), dflt='')
  maindim := 0
  flavor := 'python'
  byteorder := 'irrelevant'
  chunkshape := None




Thursday, January 5, 2012
def _get_pgroup(self, file, p, proj = None):
         """
         Get group node of tables.File corresponding to property p.

           Creates group node, if it does not exist yet.

           :param tables.File file: Handle to HDF5 file to which records are saved.
           :param string p: To be recorded property.
           :param Projection proj: Projection from which property p is recorded.

           :return: Group node corresponding to property p.
           """

           SDict = self.sim.config.ShapeDispatch

           if not proj:
               name = self.sheet.name
           else:
               name = proj.name

           try:
                  pgroup = file.getNode('/%s_%s' % (p, name,))

           except NoSuchNodeError:
               pgroup = file.createGroup('/', '%s_%s' % (p, name,))
               file.createEArray(pgroup, 'data', Float64Atom(),
                   flatten((0, SDict[p])))
               file.createEArray(pgroup, 'step', Int32Atom(), (0, 1))

           return pgroup

     def _write_attr(self, pgroup, data):
         """
         Helper fn writing provided data and step count to group node (of
         tables.File)

           :param tables.group.Group pgroup: Group node to which data is saved.
           :param numpy.Array data: Data matrix to be recorded.
           """

           pgroup.data.append([data])
           pgroup.step.append([[self.count]])
Thursday, January 5, 2012
def function(self):
           """
           Stores activity submatrices from recordings file per node to 3D array
           and returns reshaped 2D version of it.
           """

                x = self.x
                y = self.y
                size = self.size
                nnames = self.nnames

                array = np.zeros((len(nnames), size, size))

                with openFile(self.path, 'r') as file:
                    for i, nname in enumerate(nnames):
                        node = file.getNode(nname)
                        array[i, :, :] = 
                            node.data.read(self.cnt)[0, x : x + size, y : y + size]

                return array.reshape(size, size * len(nnames))




Thursday, January 5, 2012
Useful Programs


                     • HDFView or ViTables
                     • h5dump
                     • hdf5read, hdf5info (MATLAB)


Thursday, January 5, 2012

Pytables

  • 1.
    HDF5 Hierarchical Data Format Thursday, January 5, 2012
  • 2.
    /the/object/tree • Datasets, Leaf • Tables, records with fixed-length fields • Arrays: Matrices of same type • VLArray, EArray, Array • Groups • May contain groups and datasets Thursday, January 5, 2012
  • 3.
    from tables import* # Define a user record to characterize some kind of particles class Particle(IsDescription): name = StringCol(16) # 16-character String idnumber = Int64Col() # Signed 64-bit integer ADCcount = UInt16Col() # Unsigned short integer TDCcount = UInt8Col() # unsigned byte grid_i = Int32Col() # integer grid_j = Int32Col() # integer pressure = Float32Col() # float (single-precision) energy = FloatCol() # double (double-precision) filename = "test.h5" # Open a file in "w"rite mode h5file = openFile(filename, mode = "w", title = "Test file") # Create a new group under "/" (root) group = h5file.createGroup("/", 'detector', 'Detector information') # Create one table on it table = h5file.createTable(group, 'readout', Particle, "Readout example") # Fill the table with 10 particles particle = table.row for i in xrange(10): particle['name'] = 'Particle: %6d' % (i) particle['TDCcount'] = i % 256 particle['ADCcount'] = (i * 256) % (1 << 16) particle['grid_i'] = i particle['grid_j'] = 10 - i particle['pressure'] = float(i*i) particle['energy'] = float(particle['pressure'] ** 4) particle['idnumber'] = i * (2 ** 34) # Insert a new particle record particle.append() # Close (and flush) the file h5file.close() Thursday, January 5, 2012
  • 4.
  • 5.
    Filling a table >>> class Particle(IsDescription): ... name = StringCol(16) # 16-character String ... idnumber = Int64Col() # Signed 64-bit integer ... ADCcount = UInt16Col() # Unsigned short integer ... TDCcount = UInt8Col() # unsigned byte ... grid_i = Int32Col() # 32-bit integer ... grid_j = Int32Col() # 32-bit integer ... pressure = Float32Col() # float (single-precision) ... energy = Float64Col() # double (double-precision) >>> table = h5file.root.detector.readout >>> particle = table.row >>> for i in xrange(10, 15): ... particle['name'] = 'Particle: %6d' % (i) ... particle['TDCcount'] = i % 256 ... particle['ADCcount'] = (i * 256) % (1 << 16) ... particle['grid_i'] = i ... particle['grid_j'] = 10 - i ... particle['pressure'] = float(i*i) ... particle['energy'] = float(particle['pressure'] ** 4) ... particle['idnumber'] = i * (2 ** 34) ... particle.append() >>> table.flush() Thursday, January 5, 2012
  • 6.
    Accessing a table: Slicing >>> table.cols.TDCcount[0] = 1 >>> table.cols.energy[1:9:3] = [2,3,4] Thursday, January 5, 2012
  • 7.
    Search in Tables >>> class Particle(IsDescription): ... name = StringCol(16) # 16-character String ... idnumber = Int64Col() # Signed 64-bit integer ... ADCcount = UInt16Col() # Unsigned short integer ... TDCcount = UInt8Col() # unsigned byte ... grid_i = Int32Col() # 32-bit integer ... grid_j = Int32Col() # 32-bit integer ... pressure = Float32Col() # float (single-precision) ... energy = Float64Col() # double (double-precision) >>> table = h5file.root.detector.readout >>> pressure = [x['pressure'] for x in table.iterrows() if x['TDCcount'] > 3 and 20 <= x ['pressure'] < 50] >>> pressure [25.0, 36.0, 49.0] “In-Kernel” Version >>> names = [ x['name'] for x in table.where("""(TDCcount > 3) & (20 <= pressure) & (pressure < 50)" >>> names ['Particle: 5', 'Particle: 6', 'Particle: 7'] Thursday, January 5, 2012
  • 8.
    Attributes >>> table = h5file.root.detector.readout >>> table.attrs.gath_date = "Wed, 06/12/2003 18:33" >>> table.attrs.temperature = 18.4 >>> table.attrs.temp_scale = "Celsius" Thursday, January 5, 2012
  • 9.
    (C)Arrays import numpy import tables fileName = 'carray1.h5' shape = (200, 300) atom = tables.UInt8Atom() filters = tables.Filters(complevel=5, complib='zlib') h5f = tables.openFile(fileName, 'w') ca = h5f.createCArray(h5f.root, 'carray', atom, shape, filters=filters) # Fill a hyperslab in ``ca``. ca[10:60, 20:70] = numpy.ones((50, 50)) h5f.close() # Re-open and read another hyperslab h5f = tables.openFile(fileName) print h5f print h5f.root.carray[8:12, 18:22] h5f.close() Thursday, January 5, 2012
  • 10.
    (E)Arrays import tables import numpy fileh = tables.openFile('earray1.h5', mode='w') a = tables.StringAtom(itemsize=8) # Use ''a'' as the object type for the enlargeable array. array_c = fileh.createEArray(fileh.root, 'array_c', a, (0,), "Chars") array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8')) array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8')) # Read the string ''EArray'' we have created on disk. for s in array_c: print 'array_c[%s] => %r' % (array_c.nrow, s) # Close the file. fileh.close() Thursday, January 5, 2012
  • 11.
    Pytables likes Numpy >>>gcolumns = h5file.createGroup(h5file.root, "columns", "Pressure and Name") >>> h5file.createArray(gcolumns, 'pressure', array(pressure)) "Pressure column selection") /columns/pressure (Array(3,)) 'Pressure column selection' atom := Float64Atom(shape=(), dflt=0.0) maindim := 0 flavor := 'numpy' byteorder := 'little' chunkshape := None >>> h5file.createArray(gcolumns, 'name', names, "Name column selection") /columns/name (Array(3,)) 'Name column selection' atom := StringAtom(itemsize=16, shape=(), dflt='') maindim := 0 flavor := 'python' byteorder := 'irrelevant' chunkshape := None Thursday, January 5, 2012
  • 12.
    def _get_pgroup(self, file,p, proj = None): """ Get group node of tables.File corresponding to property p. Creates group node, if it does not exist yet. :param tables.File file: Handle to HDF5 file to which records are saved. :param string p: To be recorded property. :param Projection proj: Projection from which property p is recorded. :return: Group node corresponding to property p. """ SDict = self.sim.config.ShapeDispatch if not proj: name = self.sheet.name else: name = proj.name try: pgroup = file.getNode('/%s_%s' % (p, name,)) except NoSuchNodeError: pgroup = file.createGroup('/', '%s_%s' % (p, name,)) file.createEArray(pgroup, 'data', Float64Atom(), flatten((0, SDict[p]))) file.createEArray(pgroup, 'step', Int32Atom(), (0, 1)) return pgroup def _write_attr(self, pgroup, data): """ Helper fn writing provided data and step count to group node (of tables.File) :param tables.group.Group pgroup: Group node to which data is saved. :param numpy.Array data: Data matrix to be recorded. """ pgroup.data.append([data]) pgroup.step.append([[self.count]]) Thursday, January 5, 2012
  • 13.
    def function(self): """ Stores activity submatrices from recordings file per node to 3D array and returns reshaped 2D version of it. """ x = self.x y = self.y size = self.size nnames = self.nnames array = np.zeros((len(nnames), size, size)) with openFile(self.path, 'r') as file: for i, nname in enumerate(nnames): node = file.getNode(nname) array[i, :, :] = node.data.read(self.cnt)[0, x : x + size, y : y + size] return array.reshape(size, size * len(nnames)) Thursday, January 5, 2012
  • 14.
    Useful Programs • HDFView or ViTables • h5dump • hdf5read, hdf5info (MATLAB) Thursday, January 5, 2012