SlideShare a Scribd company logo
1 of 43
Benchy
Lightweight performing benchmark framework for
Python scripts
Marcel Caraciolo
@marcelcaraciolo
Developer, Cientist, contributor to the Crab recsys project,
works with Python for 6 years, interested at mobile,
education, machine learning and dataaaaa!
Recife, Brazil - http://aimotion.blogspot.com
About me
Co-founder of Crab - Python recsys library
Cientist Chief at Atepassar, e-learning social network
Co-Founder and Instructor of PyCursos, teaching Python on-line
Co-Founder of Pingmind, on-line infrastructure for MOOC’s
Interested at Python, mobile, e-learning and machine learning!
Why do we test ?
Freedom from fear
Testing for
performance
What made my
code slower ?
me
Solutions ?
In	
  [1]:	
  def	
  f(x):
	
  	
  	
  ...:	
  	
  	
  	
  	
  return	
  x*x
	
  	
  	
  ...:	
  
In	
  [2]:	
  %timeit	
  for	
  x	
  in	
  range
(100):	
  f(x)
100000	
  loops,	
  best	
  of	
  3:	
  20.3	
  us	
  
per	
  loop
Stop. Help is near
Performance benchmarks to compare several python code
alternatives
Generates graphs using matplotlib
Memory consumption, Performance timing available
https://github.com/python-recsys/benchy
Performance
benchmarks
Writing benchmarks
$	
  easy_install	
  -­‐U	
  benchy	
  
#	
  pip	
  install	
  -­‐U	
  benchy
Writing benchmarks
from	
  benchy.api	
  import	
  Benchmark
common_setup	
  =	
  ""
statement	
  =	
  "lst	
  =	
  ['i'	
  for	
  x	
  in	
  range(100000)]"
benchmark1	
  =	
  Benchmark(statement,	
  common_setup,	
  name=	
  "range")
statement	
  =	
  "lst	
  =	
  ['i'	
  for	
  x	
  in	
  xrange(100000)]"
benchmark2	
  =	
  Benchmark(statement,	
  common_setup,	
  name=	
  "xrange")
statement	
  =	
  "lst	
  =	
  ['i']	
  *	
  100000"
benchmark3	
  =	
  Benchmark(statement,	
  common_setup,	
  name=	
  "range")
Use them in your
workflow
[1]:	
  print	
  benchmark1.run()
{'memory':	
  {'repeat':	
  3,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'success':	
  True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'units':	
  'MB',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'usage':	
  2.97265625},
	
  'runtime':	
  {'loops':	
  100,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'repeat':	
  3,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'success':	
  True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'timing':	
  7.5653696060180664,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'units':	
  'ms'}}
Same code as %timeit
and %memit
Beautiful reports
rst_text	
  =	
  benchmark1.to_rst(results)
Benchmark suite
from	
  benchy.api	
  import	
  BenchmarkSuite
suite	
  =	
  BenchmarkSuite()
suite.append(benchmark1)
suite.append(benchmark2)
suite.append(benchmark3)
Run the benchmarks
from	
  benchy.api	
  import	
  BenchmarkRunner
runner	
  =	
  BenchmarkRunner(benchmarks=suite,	
  tmp_dir='.',	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  name=	
  'List	
  Allocation	
  Benchmark')
n_benchs,	
  results	
  =	
  runner.run()
Who is the faster ?
{Benchmark('list	
  with	
  "*"'):
	
  	
  	
  	
  {'runtime':	
  {'timing':	
  0.47582697868347168,	
  'repeat':	
  3,	
  'success':	
  True,	
  'loops':	
  1000,	
  
'timeBaselines':	
  1.0,	
  'units':	
  'ms'},
	
  	
  	
  	
  'memory':	
  {'usage':	
  0.3828125,	
  'units':	
  'MB',	
  'repeat':	
  3,	
  'success':	
  True}},
Benchmark('list	
  with	
  xrange'):
	
  	
  	
  	
  {'runtime':	
  {'timing':	
  5.623779296875,	
  'repeat':	
  3,	
  'success':	
  True,	
  'loops':	
  100,	
  
'timeBaselines':	
  11.818958463504936,	
  'units':	
  'ms'},
	
  	
  	
  	
  'memory':	
  {'usage':	
  0.71484375,	
  'units':	
  'MB',	
  'repeat':	
  3,	
  'success':	
  True}},
Benchmark('list	
  with	
  range'):	
  {
	
  	
  	
  	
  'runtime':	
  {'timing':	
  6.5933513641357422,	
  'repeat':	
  3,	
  'success':	
  True,	
  'loops':	
  100,	
  
'timeBaselines':	
  13.856615239384636,	
  'units':	
  'ms'},
	
  	
  	
  	
  'memory':	
  {'usage':	
  2.2109375,	
  'units':	
  'MB',	
  'repeat':	
  3,	
  'success':	
  True}}}
Plot relative
fig	
  =	
  runner.plot_relative(results,	
  horizontal=True)
plt.savefig('%s_r.png'	
  %	
  runner.name,	
  bbox_inches='tight')
Plot absolute
runner.plot_absolute(results,	
  horizontal=False)
plt.savefig('%s.png'	
  %	
  runner.name)	
  #	
  bbox_inches='tight')
Full report
rst_text	
  =	
  runner.to_rst(results,	
  runner.name	
  +	
  'png',
	
  	
  	
  	
  	
  	
  	
  	
  runner.name	
  +	
  '_r.png')
with	
  open('teste.rst',	
  'w')	
  as	
  f:
	
  	
  	
  	
  	
  	
  	
  	
  f.write(rst_text)
Full report
Full report
Why ?
Benchmark pairwise functions at Crab recsys library
http://aimotion.blogspot.com.br/2013/03/performing-runtime-benchmarks-with.html
Get involved
Create the benchmarks as TestCases
Check automatically for benchmark files and run like %nose.test()
More setup and teardown control
Group benchmarks at the same graph
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
db.py
import	
  sqlite3
	
  
	
  
class	
  BenchmarkDb(object):
	
  	
  	
  	
  """
	
  	
  	
  	
  Persistence	
  handler	
  for	
  bechmark	
  results
	
  	
  	
  	
  """
	
  	
  	
  	
  def	
  _create_tables(self):
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute("drop	
  table	
  if	
  exists	
  benchmarksuites")
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute("drop	
  table	
  if	
  exists	
  benchmarks")
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute("drop	
  table	
  if	
  exists	
  results")
	
  	
  	
  	
  	
  	
  	
  	
  ...	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('CREATE	
  TABLE	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  benchmarks(checksum	
  text	
  PRIMARY	
  KEY,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  name	
  text,	
  description	
  text,	
  suite_id	
  integer,	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  FOREIGN	
  KEY(suite_id)	
  REFERENCES	
  benchmarksuites(id))')
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('CREATE	
  TABLE	
  results(id	
  integer	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  PRIMARY	
  KEY	
  AUTOINCREMENT,	
  checksum	
  text,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  timestamp	
  timestamp,	
  ncalls	
  text,	
  timing	
  float,	
  traceback	
  text,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  FOREIGN	
  KEY(checksum)	
  REFERENCES	
  benchmarks(checksum))')
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._con.commit()
	
  
	
  	
  	
  	
  def	
  write_benchmark(self,	
  bm,	
  suite=None):
	
  	
  	
  	
  	
  	
  	
  	
  if	
  suite	
  is	
  not	
  None:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('SELECT	
  id	
  FROM	
  benchmarksuites	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  where	
  name	
  =	
  "%s"'	
  %	
  suite.name)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  row	
  =	
  self._cursor.fetchone()
	
  	
  	
  	
  	
  	
  	
  	
  else:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  row	
  =	
  None
	
  
	
  	
  	
  	
  	
  	
  	
  	
  if	
  row	
  ==	
  None:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('INSERT	
  INTO	
  benchmarks	
  VALUES	
  (?,	
  ?,	
  ?,	
  ?)',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  (bm.checksum,	
  bm.name,	
  bm.description,	
  None))
	
  	
  	
  	
  	
  	
  	
  	
  else:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('INSERT	
  INTO	
  benchmarks	
  VALUES	
  (?,	
  ?,	
  ?,	
  ?)',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  (bm.checksum,	
  bm.name,	
  bm.description,	
  row[0]))
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
Git Repo
class	
  GitRepository(Repository):
	
  	
  	
  	
  """
	
  	
  	
  	
  Read	
  some	
  basic	
  statistics	
  about	
  a	
  git	
  repository
	
  	
  	
  	
  """
	
  
	
  	
  	
  	
  def	
  __init__(self,	
  repo_path):
	
  	
  	
  	
  	
  	
  	
  	
  self.repo_path	
  =	
  repo_path
	
  	
  	
  	
  	
  	
  	
  	
  self.git	
  =	
  _git_command(self.repo_path)
	
  	
  	
  	
  	
  	
  	
  	
  (self.shas,	
  self.messages,
	
  	
  	
  	
  	
  	
  	
  	
  	
  self.timestamps,	
  self.authors)	
  =	
  self._parse_commit_log()
[('d87fdf2', datetime.datetime(2013, 3, 22, 16, 55, 38)), ('a90a449', datetime.datetime(2013, 3, 22, 16, 54, 36)),
('fe66a86', datetime.datetime(2013, 3, 22, 16, 51, 2)), ('bea6b21', datetime.datetime(2013, 3, 22, 13, 14, 22)),
('bde5e63', datetime.datetime(2013, 3, 22, 5, 2, 56)), ('89634f6', datetime.datetime(2013, 3, 20, 4, 16, 19))]
Git Repo
class	
  BenchmarkRepository(object):
	
  	
  	
  	
  """
	
  	
  	
  	
  Manage	
  an	
  isolated	
  copy	
  of	
  a	
  repository	
  for	
  benchmarking
	
  	
  	
  	
  """
	
  	
  	
  	
  ...
	
  
	
  	
  	
  	
  def	
  _copy_repo(self):
	
  	
  	
  	
  	
  	
  	
  	
  if	
  os.path.exists(self.target_dir):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Deleting	
  %s	
  first'	
  %	
  self.target_dir
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  #	
  response	
  =	
  raw_input('%s	
  exists,	
  delete?	
  y/n'	
  %	
  self.target_dir)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  #	
  if	
  response	
  ==	
  'n':
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  #	
  	
  	
  	
  	
  raise	
  Exception('foo')
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'rm	
  -­‐rf	
  %s'	
  %	
  self.target_dir
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  os.system(cmd)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._clone(self.target_dir_tmp,	
  self.target_dir)
	
  	
  	
  	
  	
  	
  	
  	
  self._prep()
	
  	
  	
  	
  	
  	
  	
  	
  self._copy_benchmark_scripts_and_deps()
	
  
	
  	
  	
  	
  def	
  _clone(self,	
  source,	
  target):
	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'git	
  clone	
  %s	
  %s'	
  %	
  (source,	
  target)
	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  os.system(cmd)
	
  
	
  	
  	
  	
  def	
  _copy_benchmark_scripts_and_deps(self):
	
  	
  	
  	
  	
  	
  	
  	
  pth,	
  _	
  =	
  os.path.split(os.path.abspath(__file__))
	
  	
  	
  	
  	
  	
  	
  	
  deps	
  =	
  [os.path.join(pth,	
  'run_benchmarks.py')]
	
  	
  	
  	
  	
  	
  	
  	
  if	
  self.dependencies	
  is	
  not	
  None:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  deps.extend(self.dependencies)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  for	
  dep	
  in	
  deps:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'cp	
  %s	
  %s'	
  %	
  (dep,	
  self.target_dir)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  proc	
  =	
  subprocess.Popen(cmd,	
  shell=True)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  proc.wait()
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
New Runner
	
  
class	
  BenchmarkGitRunner(BenchmarkRunner):
	
  	
  ...
	
  	
  	
  
	
  	
  	
  	
  def	
  _register_benchmarks(self):
	
  	
  	
  	
  	
  	
  	
  	
  ex_benchmarks	
  =	
  self.db.get_benchmarks()
	
  	
  	
  	
  	
  	
  	
  	
  db_checksums	
  =	
  set(ex_benchmarks.index)
	
  	
  	
  	
  	
  	
  	
  	
  for	
  bm	
  in	
  self.benchmarks:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  bm.checksum	
  in	
  db_checksums:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self.db.update_name(bm)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  else:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Writing	
  new	
  benchmark	
  %s,	
  %s'	
  %	
  (bm.name,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  bm.checksum)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self.db.write_benchmark(bm)
	
  
New runner
	
  
class	
  BenchmarkGitRunner(BenchmarkRunner):
	
  	
  ...
	
  	
  	
  
	
  	
  	
  	
  def	
  _run_revision(self,	
  rev):
	
  	
  	
  	
  	
  	
  	
  	
  need_to_run	
  =	
  self._get_benchmarks_for_rev(rev)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  if	
  not	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'No	
  benchmarks	
  need	
  running	
  at	
  %s'	
  %	
  rev
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  return	
  0,	
  {}
	
  
	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Running	
  %d	
  benchmarks	
  for	
  revision	
  %s'	
  %	
  (len(need_to_run),	
  rev)
	
  	
  	
  	
  	
  	
  	
  	
  for	
  bm	
  in	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  bm.name
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self.bench_repo.switch_to_revision(rev)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  pickle_path	
  =	
  os.path.join(self.tmp_dir,	
  'benchmarks.pickle')
	
  	
  	
  	
  	
  	
  	
  	
  results_path	
  =	
  os.path.join(self.tmp_dir,	
  'results.pickle')
	
  	
  	
  	
  	
  	
  	
  	
  if	
  os.path.exists(results_path):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  os.remove(results_path)
	
  	
  	
  	
  	
  	
  	
  	
  pickle.dump(need_to_run,	
  open(pickle_path,	
  'w'))
	
  
	
  	
  	
  	
  	
  	
  	
  	
  #	
  run	
  the	
  process
	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'python	
  %s/run_benchmarks.py	
  %s	
  %s'	
  %	
  (pickle_path,	
  results_path)
	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  proc	
  =	
  subprocess.Popen(cmd,	
  stdout=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  stderr=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  shell=True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cwd=self.tmp_dir)
	
  	
  	
  	
  	
  	
  	
  	
  stdout,	
  stderr	
  =	
  proc.communicate()
	
  
New runner	
  
class	
  BenchmarkGitRunner(BenchmarkRunner):
	
  	
  ...
	
  	
  	
  
	
  	
  	
  	
  def	
  _run_revision(self,	
  rev):
	
  	
  	
  	
  	
  	
  	
  	
  need_to_run	
  =	
  self._get_benchmarks_for_rev(rev)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  if	
  not	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'No	
  benchmarks	
  need	
  running	
  at	
  %s'	
  %	
  rev
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  return	
  0,	
  {}
	
  
	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Running	
  %d	
  benchmarks	
  for	
  revision	
  %s'	
  %	
  (len(need_to_run),	
  rev)
	
  	
  	
  	
  	
  	
  	
  	
  for	
  bm	
  in	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  bm.name
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self.bench_repo.switch_to_revision(rev)
	
  	
  	
  	
  	
  	
  	
  	
  #	
  run	
  the	
  process
	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'python	
  %s/run_benchmarks.py	
  %s	
  %s'	
  %	
  (pickle_path,	
  results_path)
	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  proc	
  =	
  subprocess.Popen(cmd,	
  stdout=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  stderr=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  shell=True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cwd=self.tmp_dir)
	
  	
  	
  	
  	
  	
  	
  	
  stdout,	
  stderr	
  =	
  proc.communicate()
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  stderr:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  ("object	
  has	
  no	
  attribute"	
  in	
  stderr	
  or
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'ImportError'	
  in	
  stderr):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  stderr
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'HARD	
  CLEANING!'
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self.bench_repo.hard_clean()
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  stderr
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  not	
  os.path.exists(results_path):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Failed	
  for	
  revision	
  %s'	
  %	
  rev
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  return	
  len(need_to_run),	
  {}
	
  	
  	
  	
  	
  	
  	
  	
  results	
  =	
  pickle.load(open(results_path,	
  'r'))
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
Running
from	
  benchmark	
  import	
  Benchmark,	
  BenchmarkRepository,	
  BenchmarkGitRunner
try:
	
  	
  	
  	
  REPO_PATH	
  =	
  config.get('setup',	
  'repo_path')
	
  	
  	
  	
  REPO_URL	
  =	
  config.get('setup',	
  'repo_url')
	
  	
  	
  	
  DB_PATH	
  =	
  config.get('setup',	
  'db_path')
	
  	
  	
  	
  TMP_DIR	
  =	
  config.get('setup',	
  'tmp_dir')
except:
	
  	
  	
  	
  REPO_PATH	
  =	
  os.path.abspath(os.path.join(os.path.dirname(__file__),	
  
"../"))
	
  	
  	
  	
  REPO_URL	
  =	
  'git@github.com:python-­‐recsys/crab.git'
	
  	
  	
  	
  DB_PATH	
  =	
  os.path.join(REPO_PATH,	
  'suite/benchmarks.db')
	
  	
  	
  	
  TMP_DIR	
  =	
  os.path.join(HOME,	
  'tmp/base_benchy/')
PREPARE	
  =	
  """
python	
  setup.py	
  clean
"""
BUILD	
  =	
  """
python	
  setup.py	
  build_ext	
  -­‐-­‐inplace
"""
repo	
  =	
  BenchmarkRepository(REPO_PATH,	
  REPO_URL,	
  DB_PATH,	
  TMP_DIR)
Running
	
  	
  	
  	
  common_setup	
  =	
  """
	
  	
  	
  	
  	
  import	
  numpy
	
  	
  	
  	
  	
  from	
  crab.metrics	
  import	
  cosine_distances
	
  	
  	
  	
  	
  X	
  =	
  numpy.random.uniform(1,5,(1000,))
	
  	
  	
  	
  """
	
  
	
  	
  	
  	
  bench	
  =	
  Benchmark(statement,	
  setup_bk1,	
  name="Crab	
  
Cosine")
	
  
	
  	
  	
  	
  suite	
  =	
  BenchmarkSuite()
	
  	
  	
  	
  suite.append(bench)	
  	
  	
  	
  	
  
	
  	
  	
  	
  statement	
  =	
  "cosine_distances(X,	
  X)"
	
  
	
  	
  	
  	
  runner	
  =	
  BenchmarkGitRunner(suite,	
  '.',	
  'Absolute	
  
timing	
  in	
  ms')
	
  	
  	
  	
  n_benchs,	
  results	
  =	
  runner.run()
	
  
	
  	
  	
  	
  runner.plot_history(results)
	
  	
  	
  	
  plt.show()
Improvements
Historical commits from version control now
benchmarked
Working now:
Module detection
by_module	
  =	
  {}
benchmarks	
  =	
  []
modules	
  =	
  ['metrics',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'recommenders',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'similarities']
for	
  modname	
  in	
  modules:
	
  	
  	
  	
  ref	
  =	
  __import__(modname)
	
  	
  	
  	
  by_module[modname]	
  =	
  [v	
  for	
  v	
  in	
  ref.__dict__.values()
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  isinstance(v,	
  Benchmark)]
	
  	
  	
  	
  benchmarks.extend(by_module[modname])
for	
  bm	
  in	
  benchmarks:
	
  	
  	
  	
  assert(bm.name	
  is	
  not	
  None)
https://github.com/python-recsys/benchy
Forks and pull requests are welcomed!
Benchy
Lightweight performing benchmark framework for
Python scripts
Marcel Caraciolo
@marcelcaraciolo
Developer, Cientist, contributor to the Crab recsys project,
works with Python for 6 years, interested at mobile,
education, machine learning and dataaaaa!
Recife, Brazil - http://aimotion.blogspot.com

More Related Content

What's hot

Persisting Data on SQLite using Room
Persisting Data on SQLite using RoomPersisting Data on SQLite using Room
Persisting Data on SQLite using RoomNelson Glauber Leal
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeMongoDB
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeStripe
 
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak   CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak PROIDEA
 
Python 내장 함수
Python 내장 함수Python 내장 함수
Python 내장 함수용 최
 
Presentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERPPresentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERPOdoo
 
Event Sourcing and Functional Programming
Event Sourcing and Functional ProgrammingEvent Sourcing and Functional Programming
Event Sourcing and Functional ProgrammingGlobalLogic Ukraine
 
Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩HyeonSeok Choi
 
How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF Luc Bors
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserHoward Lewis Ship
 
Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV) Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV) Mobivery
 
Dm adapter RubyConf.TW
Dm adapter RubyConf.TWDm adapter RubyConf.TW
Dm adapter RubyConf.TWcodingforrent
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)MongoSF
 
Node js mongodriver
Node js mongodriverNode js mongodriver
Node js mongodriverchristkv
 
R (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemR (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemMaithreya Chakravarthula
 
R (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support SystemR (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support SystemMaithreya Chakravarthula
 

What's hot (20)

Persisting Data on SQLite using Room
Persisting Data on SQLite using RoomPersisting Data on SQLite using Room
Persisting Data on SQLite using Room
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at Stripe
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at Stripe
 
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak   CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
 
Python 내장 함수
Python 내장 함수Python 내장 함수
Python 내장 함수
 
Presentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERPPresentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERP
 
Event Sourcing and Functional Programming
Event Sourcing and Functional ProgrammingEvent Sourcing and Functional Programming
Event Sourcing and Functional Programming
 
Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩
 
How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The Browser
 
Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV) Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
 
Javascript
JavascriptJavascript
Javascript
 
Dm adapter RubyConf.TW
Dm adapter RubyConf.TWDm adapter RubyConf.TW
Dm adapter RubyConf.TW
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
 
Node js mongodriver
Node js mongodriverNode js mongodriver
Node js mongodriver
 
I os 04
I os 04I os 04
I os 04
 
Dm adapter
Dm adapterDm adapter
Dm adapter
 
R (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemR (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support System
 
R (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support SystemR (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support System
 
Zend framework service
Zend framework serviceZend framework service
Zend framework service
 

Viewers also liked

Learning Pool Social Care Seminar
Learning Pool Social Care SeminarLearning Pool Social Care Seminar
Learning Pool Social Care SeminarPaul McElvaney
 
Migration Intro
Migration IntroMigration Intro
Migration Introdazza50
 
Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)zeeg
 
Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]Ian Walcott-Skinner
 
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.Paul McElvaney
 
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...Paul McElvaney
 
Describing exercise
Describing exerciseDescribing exercise
Describing exerciseSussan Roo
 
Ued案例
Ued案例Ued案例
Ued案例yamingd
 
Learning Pool and Carers
Learning Pool and Carers Learning Pool and Carers
Learning Pool and Carers Paul McElvaney
 
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל    נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל Udi Salant
 
Hari
HariHari
Harislomb
 
Developing Policy for Emerging Technologies
Developing Policy for Emerging TechnologiesDeveloping Policy for Emerging Technologies
Developing Policy for Emerging TechnologiesLovisa Williams
 
Paper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education TechnologyPaper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education TechnologyJinal Jhaveri
 
Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009Sjef Kerkhofs
 

Viewers also liked (20)

Learning Pool Social Care Seminar
Learning Pool Social Care SeminarLearning Pool Social Care Seminar
Learning Pool Social Care Seminar
 
Migration Intro
Migration IntroMigration Intro
Migration Intro
 
Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)
 
Lecture 22
Lecture 22Lecture 22
Lecture 22
 
Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]
 
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
 
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
 
Describing exercise
Describing exerciseDescribing exercise
Describing exercise
 
Ued案例
Ued案例Ued案例
Ued案例
 
Lecture 10
Lecture 10Lecture 10
Lecture 10
 
I gala premios peridotita
I gala premios peridotitaI gala premios peridotita
I gala premios peridotita
 
Learning Pool and Carers
Learning Pool and Carers Learning Pool and Carers
Learning Pool and Carers
 
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל    נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
 
Hari
HariHari
Hari
 
Developing Policy for Emerging Technologies
Developing Policy for Emerging TechnologiesDeveloping Policy for Emerging Technologies
Developing Policy for Emerging Technologies
 
Lecture 23
Lecture 23Lecture 23
Lecture 23
 
Divosa v1.3
Divosa v1.3Divosa v1.3
Divosa v1.3
 
Paper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education TechnologyPaper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education Technology
 
Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009
 
Tema ii
Tema iiTema ii
Tema ii
 

Similar to Benchy, python framework for performance benchmarking of Python Scripts

Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks Marcel Caraciolo
 
Viktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning ServiceViktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning ServiceLviv Startup Club
 
Designing REST API automation tests in Kotlin
Designing REST API automation tests in KotlinDesigning REST API automation tests in Kotlin
Designing REST API automation tests in KotlinDmitriy Sobko
 
Fun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksFun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksMongoDB
 
Protractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsProtractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsLudmila Nesvitiy
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQLPeter Eisentraut
 
Data visualization in python/Django
Data visualization in python/DjangoData visualization in python/Django
Data visualization in python/Djangokenluck2001
 
Inside PyMongo - MongoNYC
Inside PyMongo - MongoNYCInside PyMongo - MongoNYC
Inside PyMongo - MongoNYCMike Dirolf
 
Design Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron PattersonDesign Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron PattersonManageIQ
 
Azure machine learning service
Azure machine learning serviceAzure machine learning service
Azure machine learning serviceRuth Yakubu
 
MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)Mike Dirolf
 
Art & music vs Google App Engine
Art & music vs Google App EngineArt & music vs Google App Engine
Art & music vs Google App Enginethomas alisi
 
Unsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at ScaleUnsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at ScaleAaron (Ari) Bornstein
 
Python Development (MongoSF)
Python Development (MongoSF)Python Development (MongoSF)
Python Development (MongoSF)Mike Dirolf
 
The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189Mahmoud Samir Fayed
 
Nyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedNyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedVivian S. Zhang
 
pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기Yeongseon Choe
 

Similar to Benchy, python framework for performance benchmarking of Python Scripts (20)

Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks
 
Viktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning ServiceViktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning Service
 
Designing REST API automation tests in Kotlin
Designing REST API automation tests in KotlinDesigning REST API automation tests in Kotlin
Designing REST API automation tests in Kotlin
 
Fun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksFun Teaching MongoDB New Tricks
Fun Teaching MongoDB New Tricks
 
Protractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsProtractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applications
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQL
 
Data visualization in python/Django
Data visualization in python/DjangoData visualization in python/Django
Data visualization in python/Django
 
Inside PyMongo - MongoNYC
Inside PyMongo - MongoNYCInside PyMongo - MongoNYC
Inside PyMongo - MongoNYC
 
Design Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron PattersonDesign Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron Patterson
 
What's new in Django 1.2?
What's new in Django 1.2?What's new in Django 1.2?
What's new in Django 1.2?
 
Azure machine learning service
Azure machine learning serviceAzure machine learning service
Azure machine learning service
 
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
 
MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)
 
Art & music vs Google App Engine
Art & music vs Google App EngineArt & music vs Google App Engine
Art & music vs Google App Engine
 
Unsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at ScaleUnsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at Scale
 
Python Development (MongoSF)
Python Development (MongoSF)Python Development (MongoSF)
Python Development (MongoSF)
 
The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189
 
Nyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedNyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expanded
 
Django Pro ORM
Django Pro ORMDjango Pro ORM
Django Pro ORM
 
pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기
 

More from Marcel Caraciolo

Como interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com PythonComo interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com PythonMarcel Caraciolo
 
Joblib: Lightweight pipelining for parallel jobs (v2)
Joblib:  Lightweight pipelining for parallel jobs (v2)Joblib:  Lightweight pipelining for parallel jobs (v2)
Joblib: Lightweight pipelining for parallel jobs (v2)Marcel Caraciolo
 
Construindo softwares de bioinformática para análises clínicas : Desafios e...
Construindo softwares  de bioinformática  para análises clínicas : Desafios e...Construindo softwares  de bioinformática  para análises clínicas : Desafios e...
Construindo softwares de bioinformática para análises clínicas : Desafios e...Marcel Caraciolo
 
Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2Marcel Caraciolo
 
Como Python pode ajudar na automação do seu laboratório
Como Python pode ajudar na automação do  seu laboratórioComo Python pode ajudar na automação do  seu laboratório
Como Python pode ajudar na automação do seu laboratórioMarcel Caraciolo
 
Python on Science ? Yes, We can.
Python on Science ?   Yes, We can.Python on Science ?   Yes, We can.
Python on Science ? Yes, We can.Marcel Caraciolo
 
Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3Marcel Caraciolo
 
Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)Marcel Caraciolo
 
Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?Marcel Caraciolo
 
Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?Marcel Caraciolo
 
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...Marcel Caraciolo
 
Construindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com PythonConstruindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com PythonMarcel Caraciolo
 
Python, A pílula Azul da programação
Python, A pílula Azul da programaçãoPython, A pílula Azul da programação
Python, A pílula Azul da programaçãoMarcel Caraciolo
 
Construindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduceConstruindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduceMarcel Caraciolo
 
Como Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no BrasilComo Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no BrasilMarcel Caraciolo
 
Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?Marcel Caraciolo
 
Aula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursosAula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursosMarcel Caraciolo
 
Arquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursosArquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursosMarcel Caraciolo
 
PyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for FoursquarePyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for FoursquareMarcel Caraciolo
 

More from Marcel Caraciolo (20)

Como interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com PythonComo interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com Python
 
Joblib: Lightweight pipelining for parallel jobs (v2)
Joblib:  Lightweight pipelining for parallel jobs (v2)Joblib:  Lightweight pipelining for parallel jobs (v2)
Joblib: Lightweight pipelining for parallel jobs (v2)
 
Construindo softwares de bioinformática para análises clínicas : Desafios e...
Construindo softwares  de bioinformática  para análises clínicas : Desafios e...Construindo softwares  de bioinformática  para análises clínicas : Desafios e...
Construindo softwares de bioinformática para análises clínicas : Desafios e...
 
Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2
 
Como Python pode ajudar na automação do seu laboratório
Como Python pode ajudar na automação do  seu laboratórioComo Python pode ajudar na automação do  seu laboratório
Como Python pode ajudar na automação do seu laboratório
 
Python on Science ? Yes, We can.
Python on Science ?   Yes, We can.Python on Science ?   Yes, We can.
Python on Science ? Yes, We can.
 
Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3
 
Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)
 
Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?
 
Big Data com Python
Big Data com PythonBig Data com Python
Big Data com Python
 
Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?
 
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
 
Construindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com PythonConstruindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com Python
 
Python, A pílula Azul da programação
Python, A pílula Azul da programaçãoPython, A pílula Azul da programação
Python, A pílula Azul da programação
 
Construindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduceConstruindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduce
 
Como Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no BrasilComo Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no Brasil
 
Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?
 
Aula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursosAula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursos
 
Arquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursosArquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursos
 
PyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for FoursquarePyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for Foursquare
 

Recently uploaded

Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Commit University
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsMemoori
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Scott Keck-Warren
 
Powerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time ClashPowerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time Clashcharlottematthew16
 
Story boards and shot lists for my a level piece
Story boards and shot lists for my a level pieceStory boards and shot lists for my a level piece
Story boards and shot lists for my a level piececharlottematthew16
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationSafe Software
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Manik S Magar
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationRidwan Fadjar
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Mark Simos
 
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek SchlawackFwdays
 
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupFlorian Wilhelm
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsMiki Katsuragi
 
Commit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easyCommit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easyAlfredo García Lavilla
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfRankYa
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLScyllaDB
 
Leverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage Cost
Leverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage CostLeverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage Cost
Leverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage CostZilliz
 
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfAlex Barbosa Coqueiro
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 

Recently uploaded (20)

E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptxE-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial Buildings
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024
 
Powerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time ClashPowerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time Clash
 
Story boards and shot lists for my a level piece
Story boards and shot lists for my a level pieceStory boards and shot lists for my a level piece
Story boards and shot lists for my a level piece
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 Presentation
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
 
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
"Subclassing and Composition – A Pythonic Tour of Trade-Offs", Hynek Schlawack
 
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project Setup
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering Tips
 
Commit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easyCommit 2024 - Secret Management made easy
Commit 2024 - Secret Management made easy
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdf
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQL
 
Leverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage Cost
Leverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage CostLeverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage Cost
Leverage Zilliz Serverless - Up to 50X Saving for Your Vector Storage Cost
 
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdf
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 

Benchy, python framework for performance benchmarking of Python Scripts

  • 1. Benchy Lightweight performing benchmark framework for Python scripts Marcel Caraciolo @marcelcaraciolo Developer, Cientist, contributor to the Crab recsys project, works with Python for 6 years, interested at mobile, education, machine learning and dataaaaa! Recife, Brazil - http://aimotion.blogspot.com
  • 2. About me Co-founder of Crab - Python recsys library Cientist Chief at Atepassar, e-learning social network Co-Founder and Instructor of PyCursos, teaching Python on-line Co-Founder of Pingmind, on-line infrastructure for MOOC’s Interested at Python, mobile, e-learning and machine learning!
  • 3. Why do we test ?
  • 6. What made my code slower ?
  • 7.
  • 8. me
  • 9. Solutions ? In  [1]:  def  f(x):      ...:          return  x*x      ...:   In  [2]:  %timeit  for  x  in  range (100):  f(x) 100000  loops,  best  of  3:  20.3  us   per  loop
  • 10. Stop. Help is near Performance benchmarks to compare several python code alternatives Generates graphs using matplotlib Memory consumption, Performance timing available https://github.com/python-recsys/benchy
  • 12. Writing benchmarks $  easy_install  -­‐U  benchy   #  pip  install  -­‐U  benchy
  • 13. Writing benchmarks from  benchy.api  import  Benchmark common_setup  =  "" statement  =  "lst  =  ['i'  for  x  in  range(100000)]" benchmark1  =  Benchmark(statement,  common_setup,  name=  "range") statement  =  "lst  =  ['i'  for  x  in  xrange(100000)]" benchmark2  =  Benchmark(statement,  common_setup,  name=  "xrange") statement  =  "lst  =  ['i']  *  100000" benchmark3  =  Benchmark(statement,  common_setup,  name=  "range")
  • 14. Use them in your workflow [1]:  print  benchmark1.run() {'memory':  {'repeat':  3,                        'success':  True,                        'units':  'MB',                        'usage':  2.97265625},  'runtime':  {'loops':  100,                          'repeat':  3,                          'success':  True,                          'timing':  7.5653696060180664,                          'units':  'ms'}} Same code as %timeit and %memit
  • 15. Beautiful reports rst_text  =  benchmark1.to_rst(results)
  • 16. Benchmark suite from  benchy.api  import  BenchmarkSuite suite  =  BenchmarkSuite() suite.append(benchmark1) suite.append(benchmark2) suite.append(benchmark3)
  • 17. Run the benchmarks from  benchy.api  import  BenchmarkRunner runner  =  BenchmarkRunner(benchmarks=suite,  tmp_dir='.',                                                            name=  'List  Allocation  Benchmark') n_benchs,  results  =  runner.run()
  • 18. Who is the faster ? {Benchmark('list  with  "*"'):        {'runtime':  {'timing':  0.47582697868347168,  'repeat':  3,  'success':  True,  'loops':  1000,   'timeBaselines':  1.0,  'units':  'ms'},        'memory':  {'usage':  0.3828125,  'units':  'MB',  'repeat':  3,  'success':  True}}, Benchmark('list  with  xrange'):        {'runtime':  {'timing':  5.623779296875,  'repeat':  3,  'success':  True,  'loops':  100,   'timeBaselines':  11.818958463504936,  'units':  'ms'},        'memory':  {'usage':  0.71484375,  'units':  'MB',  'repeat':  3,  'success':  True}}, Benchmark('list  with  range'):  {        'runtime':  {'timing':  6.5933513641357422,  'repeat':  3,  'success':  True,  'loops':  100,   'timeBaselines':  13.856615239384636,  'units':  'ms'},        'memory':  {'usage':  2.2109375,  'units':  'MB',  'repeat':  3,  'success':  True}}}
  • 19. Plot relative fig  =  runner.plot_relative(results,  horizontal=True) plt.savefig('%s_r.png'  %  runner.name,  bbox_inches='tight')
  • 21. Full report rst_text  =  runner.to_rst(results,  runner.name  +  'png',                runner.name  +  '_r.png') with  open('teste.rst',  'w')  as  f:                f.write(rst_text)
  • 24. Why ? Benchmark pairwise functions at Crab recsys library http://aimotion.blogspot.com.br/2013/03/performing-runtime-benchmarks-with.html
  • 25. Get involved Create the benchmarks as TestCases Check automatically for benchmark files and run like %nose.test() More setup and teardown control Group benchmarks at the same graph
  • 26. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 27. db.py import  sqlite3     class  BenchmarkDb(object):        """        Persistence  handler  for  bechmark  results        """        def  _create_tables(self):                self._cursor.execute("drop  table  if  exists  benchmarksuites")                self._cursor.execute("drop  table  if  exists  benchmarks")                self._cursor.execute("drop  table  if  exists  results")                ...                    self._cursor.execute('CREATE  TABLE                              benchmarks(checksum  text  PRIMARY  KEY,                          name  text,  description  text,  suite_id  integer,                            FOREIGN  KEY(suite_id)  REFERENCES  benchmarksuites(id))')                  self._cursor.execute('CREATE  TABLE  results(id  integer                          PRIMARY  KEY  AUTOINCREMENT,  checksum  text,                          timestamp  timestamp,  ncalls  text,  timing  float,  traceback  text,                          FOREIGN  KEY(checksum)  REFERENCES  benchmarks(checksum))')                  self._con.commit()          def  write_benchmark(self,  bm,  suite=None):                if  suite  is  not  None:                        self._cursor.execute('SELECT  id  FROM  benchmarksuites                                  where  name  =  "%s"'  %  suite.name)                        row  =  self._cursor.fetchone()                else:                        row  =  None                  if  row  ==  None:                        self._cursor.execute('INSERT  INTO  benchmarks  VALUES  (?,  ?,  ?,  ?)',                                (bm.checksum,  bm.name,  bm.description,  None))                else:                        self._cursor.execute('INSERT  INTO  benchmarks  VALUES  (?,  ?,  ?,  ?)',                                (bm.checksum,  bm.name,  bm.description,  row[0]))
  • 28. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 29. Git Repo class  GitRepository(Repository):        """        Read  some  basic  statistics  about  a  git  repository        """          def  __init__(self,  repo_path):                self.repo_path  =  repo_path                self.git  =  _git_command(self.repo_path)                (self.shas,  self.messages,                  self.timestamps,  self.authors)  =  self._parse_commit_log() [('d87fdf2', datetime.datetime(2013, 3, 22, 16, 55, 38)), ('a90a449', datetime.datetime(2013, 3, 22, 16, 54, 36)), ('fe66a86', datetime.datetime(2013, 3, 22, 16, 51, 2)), ('bea6b21', datetime.datetime(2013, 3, 22, 13, 14, 22)), ('bde5e63', datetime.datetime(2013, 3, 22, 5, 2, 56)), ('89634f6', datetime.datetime(2013, 3, 20, 4, 16, 19))]
  • 30. Git Repo class  BenchmarkRepository(object):        """        Manage  an  isolated  copy  of  a  repository  for  benchmarking        """        ...          def  _copy_repo(self):                if  os.path.exists(self.target_dir):                        print  'Deleting  %s  first'  %  self.target_dir                        #  response  =  raw_input('%s  exists,  delete?  y/n'  %  self.target_dir)                        #  if  response  ==  'n':                        #          raise  Exception('foo')                        cmd  =  'rm  -­‐rf  %s'  %  self.target_dir                        print  cmd                        os.system(cmd)                  self._clone(self.target_dir_tmp,  self.target_dir)                self._prep()                self._copy_benchmark_scripts_and_deps()          def  _clone(self,  source,  target):                cmd  =  'git  clone  %s  %s'  %  (source,  target)                print  cmd                os.system(cmd)          def  _copy_benchmark_scripts_and_deps(self):                pth,  _  =  os.path.split(os.path.abspath(__file__))                deps  =  [os.path.join(pth,  'run_benchmarks.py')]                if  self.dependencies  is  not  None:                        deps.extend(self.dependencies)                  for  dep  in  deps:                        cmd  =  'cp  %s  %s'  %  (dep,  self.target_dir)                        print  cmd                        proc  =  subprocess.Popen(cmd,  shell=True)                        proc.wait()
  • 31. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 32. New Runner   class  BenchmarkGitRunner(BenchmarkRunner):    ...              def  _register_benchmarks(self):                ex_benchmarks  =  self.db.get_benchmarks()                db_checksums  =  set(ex_benchmarks.index)                for  bm  in  self.benchmarks:                        if  bm.checksum  in  db_checksums:                                self.db.update_name(bm)                        else:                                print  'Writing  new  benchmark  %s,  %s'  %  (bm.name,                                                                                              bm.checksum)                                self.db.write_benchmark(bm)  
  • 33. New runner   class  BenchmarkGitRunner(BenchmarkRunner):    ...              def  _run_revision(self,  rev):                need_to_run  =  self._get_benchmarks_for_rev(rev)                  if  not  need_to_run:                        print  'No  benchmarks  need  running  at  %s'  %  rev                        return  0,  {}                  print  'Running  %d  benchmarks  for  revision  %s'  %  (len(need_to_run),  rev)                for  bm  in  need_to_run:                        print  bm.name                  self.bench_repo.switch_to_revision(rev)                  pickle_path  =  os.path.join(self.tmp_dir,  'benchmarks.pickle')                results_path  =  os.path.join(self.tmp_dir,  'results.pickle')                if  os.path.exists(results_path):                        os.remove(results_path)                pickle.dump(need_to_run,  open(pickle_path,  'w'))                  #  run  the  process                cmd  =  'python  %s/run_benchmarks.py  %s  %s'  %  (pickle_path,  results_path)                print  cmd                proc  =  subprocess.Popen(cmd,  stdout=subprocess.PIPE,                                                                stderr=subprocess.PIPE,                                                                shell=True,                                                                cwd=self.tmp_dir)                stdout,  stderr  =  proc.communicate()  
  • 34. New runner   class  BenchmarkGitRunner(BenchmarkRunner):    ...              def  _run_revision(self,  rev):                need_to_run  =  self._get_benchmarks_for_rev(rev)                  if  not  need_to_run:                        print  'No  benchmarks  need  running  at  %s'  %  rev                        return  0,  {}                  print  'Running  %d  benchmarks  for  revision  %s'  %  (len(need_to_run),  rev)                for  bm  in  need_to_run:                        print  bm.name                  self.bench_repo.switch_to_revision(rev)                #  run  the  process                cmd  =  'python  %s/run_benchmarks.py  %s  %s'  %  (pickle_path,  results_path)                print  cmd                proc  =  subprocess.Popen(cmd,  stdout=subprocess.PIPE,                                                                stderr=subprocess.PIPE,                                                                shell=True,                                                                cwd=self.tmp_dir)                stdout,  stderr  =  proc.communicate()                      if  stderr:                        if  ("object  has  no  attribute"  in  stderr  or                                'ImportError'  in  stderr):                                print  stderr                                print  'HARD  CLEANING!'                                self.bench_repo.hard_clean()                        print  stderr                    if  not  os.path.exists(results_path):                        print  'Failed  for  revision  %s'  %  rev                        return  len(need_to_run),  {}                results  =  pickle.load(open(results_path,  'r'))
  • 35. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 36. Running from  benchmark  import  Benchmark,  BenchmarkRepository,  BenchmarkGitRunner try:        REPO_PATH  =  config.get('setup',  'repo_path')        REPO_URL  =  config.get('setup',  'repo_url')        DB_PATH  =  config.get('setup',  'db_path')        TMP_DIR  =  config.get('setup',  'tmp_dir') except:        REPO_PATH  =  os.path.abspath(os.path.join(os.path.dirname(__file__),   "../"))        REPO_URL  =  'git@github.com:python-­‐recsys/crab.git'        DB_PATH  =  os.path.join(REPO_PATH,  'suite/benchmarks.db')        TMP_DIR  =  os.path.join(HOME,  'tmp/base_benchy/') PREPARE  =  """ python  setup.py  clean """ BUILD  =  """ python  setup.py  build_ext  -­‐-­‐inplace """ repo  =  BenchmarkRepository(REPO_PATH,  REPO_URL,  DB_PATH,  TMP_DIR)
  • 37. Running        common_setup  =  """          import  numpy          from  crab.metrics  import  cosine_distances          X  =  numpy.random.uniform(1,5,(1000,))        """          bench  =  Benchmark(statement,  setup_bk1,  name="Crab   Cosine")          suite  =  BenchmarkSuite()        suite.append(bench)                  statement  =  "cosine_distances(X,  X)"          runner  =  BenchmarkGitRunner(suite,  '.',  'Absolute   timing  in  ms')        n_benchs,  results  =  runner.run()          runner.plot_history(results)        plt.show()
  • 38. Improvements Historical commits from version control now benchmarked
  • 39. Working now: Module detection by_module  =  {} benchmarks  =  [] modules  =  ['metrics',                      'recommenders',                      'similarities'] for  modname  in  modules:        ref  =  __import__(modname)        by_module[modname]  =  [v  for  v  in  ref.__dict__.values()                                                    if  isinstance(v,  Benchmark)]        benchmarks.extend(by_module[modname]) for  bm  in  benchmarks:        assert(bm.name  is  not  None)
  • 41.
  • 42.
  • 43. Benchy Lightweight performing benchmark framework for Python scripts Marcel Caraciolo @marcelcaraciolo Developer, Cientist, contributor to the Crab recsys project, works with Python for 6 years, interested at mobile, education, machine learning and dataaaaa! Recife, Brazil - http://aimotion.blogspot.com