• Save
Pytables
Upcoming SlideShare
Loading in...5
×

Like this? Share it with your network

Share
  • Full Name Full Name Comment goes here.
    Are you sure you want to
    Your message goes here
    Be the first to comment
    Be the first to like this
No Downloads

Views

Total Views
1,855
On Slideshare
1,730
From Embeds
125
Number of Embeds
2

Actions

Shares
Downloads
0
Comments
0
Likes
0

Embeds 125

http://rocketcirc.us 124
http://a0.twimg.com 1

Report content

Flagged as inappropriate Flag as inappropriate
Flag as inappropriate

Select your reason for flagging this presentation as inappropriate.

Cancel
    No notes for slide

Transcript

  • 1. HDF5 Hierarchical Data FormatThursday, January 5, 2012
  • 2. /the/object/tree • Datasets, Leaf • Tables, records with fixed-length fields • Arrays: Matrices of same type • VLArray, EArray, Array • Groups • May contain groups and datasetsThursday, January 5, 2012
  • 3. from tables import * # Define a user record to characterize some kind of particles class Particle(IsDescription): name = StringCol(16) # 16-character String idnumber = Int64Col() # Signed 64-bit integer ADCcount = UInt16Col() # Unsigned short integer TDCcount = UInt8Col() # unsigned byte grid_i = Int32Col() # integer grid_j = Int32Col() # integer pressure = Float32Col() # float (single-precision) energy = FloatCol() # double (double-precision) filename = "test.h5" # Open a file in "w"rite mode h5file = openFile(filename, mode = "w", title = "Test file") # Create a new group under "/" (root) group = h5file.createGroup("/", detector, Detector information) # Create one table on it table = h5file.createTable(group, readout, Particle, "Readout example") # Fill the table with 10 particles particle = table.row for i in xrange(10): particle[name] = Particle: %6d % (i) particle[TDCcount] = i % 256 particle[ADCcount] = (i * 256) % (1 << 16) particle[grid_i] = i particle[grid_j] = 10 - i particle[pressure] = float(i*i) particle[energy] = float(particle[pressure] ** 4) particle[idnumber] = i * (2 ** 34) # Insert a new particle record particle.append() # Close (and flush) the file h5file.close()Thursday, January 5, 2012
  • 4. Thursday, January 5, 2012
  • 5. Filling a table >>> class Particle(IsDescription): ... name = StringCol(16) # 16-character String ... idnumber = Int64Col() # Signed 64-bit integer ... ADCcount = UInt16Col() # Unsigned short integer ... TDCcount = UInt8Col() # unsigned byte ... grid_i = Int32Col() # 32-bit integer ... grid_j = Int32Col() # 32-bit integer ... pressure = Float32Col() # float (single-precision) ... energy = Float64Col() # double (double-precision) >>> table = h5file.root.detector.readout >>> particle = table.row >>> for i in xrange(10, 15): ... particle[name] = Particle: %6d % (i) ... particle[TDCcount] = i % 256 ... particle[ADCcount] = (i * 256) % (1 << 16) ... particle[grid_i] = i ... particle[grid_j] = 10 - i ... particle[pressure] = float(i*i) ... particle[energy] = float(particle[pressure] ** 4) ... particle[idnumber] = i * (2 ** 34) ... particle.append() >>> table.flush()Thursday, January 5, 2012
  • 6. Accessing a table: Slicing >>> table.cols.TDCcount[0] = 1 >>> table.cols.energy[1:9:3] = [2,3,4]Thursday, January 5, 2012
  • 7. Search in Tables >>> class Particle(IsDescription): ... name = StringCol(16) # 16-character String ... idnumber = Int64Col() # Signed 64-bit integer ... ADCcount = UInt16Col() # Unsigned short integer ... TDCcount = UInt8Col() # unsigned byte ... grid_i = Int32Col() # 32-bit integer ... grid_j = Int32Col() # 32-bit integer ... pressure = Float32Col() # float (single-precision) ... energy = Float64Col() # double (double-precision)>>> table = h5file.root.detector.readout>>> pressure = [x[pressure] for x in table.iterrows() if x[TDCcount] > 3 and 20 <= x[pressure] < 50]>>> pressure[25.0, 36.0, 49.0] “In-Kernel” Version>>> names = [ x[name] for x in table.where("""(TDCcount > 3) & (20 <= pressure) & (pressure < 50)">>> names[Particle: 5, Particle: 6, Particle: 7]Thursday, January 5, 2012
  • 8. Attributes >>> table = h5file.root.detector.readout >>> table.attrs.gath_date = "Wed, 06/12/2003 18:33" >>> table.attrs.temperature = 18.4 >>> table.attrs.temp_scale = "Celsius"Thursday, January 5, 2012
  • 9. (C)Arrays import numpy import tables fileName = carray1.h5 shape = (200, 300) atom = tables.UInt8Atom() filters = tables.Filters(complevel=5, complib=zlib) h5f = tables.openFile(fileName, w) ca = h5f.createCArray(h5f.root, carray, atom, shape, filters=filters) # Fill a hyperslab in ``ca``. ca[10:60, 20:70] = numpy.ones((50, 50)) h5f.close() # Re-open and read another hyperslab h5f = tables.openFile(fileName) print h5f print h5f.root.carray[8:12, 18:22] h5f.close()Thursday, January 5, 2012
  • 10. (E)Arrays import tables import numpy fileh = tables.openFile(earray1.h5, mode=w) a = tables.StringAtom(itemsize=8) # Use a as the object type for the enlargeable array. array_c = fileh.createEArray(fileh.root, array_c, a, (0,), "Chars") array_c.append(numpy.array([a*2, b*4], dtype=S8)) array_c.append(numpy.array([a*6, b*8, c*10], dtype=S8)) # Read the string EArray we have created on disk. for s in array_c: print array_c[%s] => %r % (array_c.nrow, s) # Close the file. fileh.close()Thursday, January 5, 2012
  • 11. Pytables likes Numpy>>> gcolumns = h5file.createGroup(h5file.root, "columns", "Pressure and Name")>>> h5file.createArray(gcolumns, pressure, array(pressure))"Pressure column selection")/columns/pressure (Array(3,)) Pressure column selection atom := Float64Atom(shape=(), dflt=0.0) maindim := 0 flavor := numpy byteorder := little chunkshape := None>>> h5file.createArray(gcolumns, name, names, "Name column selection")/columns/name (Array(3,)) Name column selection atom := StringAtom(itemsize=16, shape=(), dflt=) maindim := 0 flavor := python byteorder := irrelevant chunkshape := NoneThursday, January 5, 2012
  • 12. def _get_pgroup(self, file, p, proj = None): """ Get group node of tables.File corresponding to property p. Creates group node, if it does not exist yet. :param tables.File file: Handle to HDF5 file to which records are saved. :param string p: To be recorded property. :param Projection proj: Projection from which property p is recorded. :return: Group node corresponding to property p. """ SDict = self.sim.config.ShapeDispatch if not proj: name = self.sheet.name else: name = proj.name try: pgroup = file.getNode(/%s_%s % (p, name,)) except NoSuchNodeError: pgroup = file.createGroup(/, %s_%s % (p, name,)) file.createEArray(pgroup, data, Float64Atom(), flatten((0, SDict[p]))) file.createEArray(pgroup, step, Int32Atom(), (0, 1)) return pgroup def _write_attr(self, pgroup, data): """ Helper fn writing provided data and step count to group node (of tables.File) :param tables.group.Group pgroup: Group node to which data is saved. :param numpy.Array data: Data matrix to be recorded. """ pgroup.data.append([data]) pgroup.step.append([[self.count]])Thursday, January 5, 2012
  • 13. def function(self): """ Stores activity submatrices from recordings file per node to 3D array and returns reshaped 2D version of it. """ x = self.x y = self.y size = self.size nnames = self.nnames array = np.zeros((len(nnames), size, size)) with openFile(self.path, r) as file: for i, nname in enumerate(nnames): node = file.getNode(nname) array[i, :, :] = node.data.read(self.cnt)[0, x : x + size, y : y + size] return array.reshape(size, size * len(nnames))Thursday, January 5, 2012
  • 14. Useful Programs • HDFView or ViTables • h5dump • hdf5read, hdf5info (MATLAB)Thursday, January 5, 2012