Benchy, python framework for performance benchmarking of Python Scripts

Benchy
Lightweight performing benchmark framework for
Python scripts
Marcel Caraciolo
@marcelcaraciolo
Developer, Cientist, contributor to the Crab recsys project,
works with Python for 6 years, interested at mobile,
education, machine learning and dataaaaa!
Recife, Brazil - http://aimotion.blogspot.com

About me
Co-founder of Crab - Python recsys library
Cientist Chief at Atepassar, e-learning social network
Co-Founder and Instructor of PyCursos, teaching Python on-line
Co-Founder of Pingmind, on-line infrastructure for MOOC’s
Interested at Python, mobile, e-learning and machine learning!

Solutions ?
In
[1]:
def
f(x):

...:

return
x*x

...:

In
[2]:
%timeit
for
x
in
range
(100):
f(x)
100000
loops,
best
of
3:
20.3
us

per
loop

Stop. Help is near
Performance benchmarks to compare several python code
alternatives
Generates graphs using matplotlib
Memory consumption, Performance timing available
https://github.com/python-recsys/benchy

Writing benchmarks
$
easy_install
-‐U
benchy

#
pip
install
-‐U
benchy

Writing benchmarks
from
benchy.api
import
Benchmark
common_setup
=
""
statement
=
"lst
=
['i'
for
x
in
range(100000)]"
benchmark1
=
Benchmark(statement,
common_setup,
name=
"range")
statement
=
"lst
=
['i'
for
x
in
xrange(100000)]"
benchmark2
=
common_setup,
name=
"xrange")
statement
=
"lst
=
['i']
*
100000"
benchmark3
=
common_setup,
name=
"range")

Use them in your
workﬂow
[1]:
print
benchmark1.run()
{'memory':
{'repeat':
3,

'success':
True,

'units':
'MB',

'usage':
2.97265625},

'runtime':
{'loops':
100,

'repeat':
3,

'success':
True,

'timing':
7.5653696060180664,

'units':
'ms'}}
Same code as %timeit
and %memit

Beautiful reports
rst_text
=
benchmark1.to_rst(results)

Benchmark suite
from
benchy.api
import
BenchmarkSuite
suite
=
BenchmarkSuite()
suite.append(benchmark1)

Run the benchmarks
from
benchy.api
import
BenchmarkRunner
runner
=
BenchmarkRunner(benchmarks=suite,
tmp_dir='.',

name=
'List
Allocation
Benchmark')
n_benchs,
results
=
runner.run()

Who is the faster ?
{Benchmark('list
with
"*"'):

{'runtime':
{'timing':
0.47582697868347168,
'repeat':
3,
'success':
True,
'loops':
1000,

'timeBaselines':
1.0,
'units':
'ms'},

'memory':
{'usage':
0.3828125,
'units':
'MB',
'repeat':
3,
'success':
True}},
Benchmark('list
with
xrange'):

{'runtime':
{'timing':
5.623779296875,
'repeat':
3,
'success':
True,
'loops':
100,

'timeBaselines':
11.818958463504936,
'units':
'ms'},

'memory':
{'usage':
0.71484375,
'units':
'MB',
'repeat':
3,
'success':
True}},
Benchmark('list
with
range'):
{

'runtime':
{'timing':
6.5933513641357422,
'repeat':
3,
'success':
True,
'loops':
100,

'timeBaselines':
13.856615239384636,
'units':
'ms'},

'memory':
{'usage':
2.2109375,
'units':
'MB',
'repeat':
3,
'success':
True}}}

Plot relative
fig
=
runner.plot_relative(results,
horizontal=True)
plt.savefig('%s_r.png'
%
runner.name,
bbox_inches='tight')

Plot absolute
runner.plot_absolute(results,
horizontal=False)
plt.savefig('%s.png'
%
runner.name)
#
bbox_inches='tight')

Full report
rst_text
=
runner.to_rst(results,
runner.name
+
'png',

runner.name
+
'_r.png')
with
open('teste.rst',
'w')
as
f:

f.write(rst_text)

Why ?
Benchmark pairwise functions at Crab recsys library
http://aimotion.blogspot.com.br/2013/03/performing-runtime-benchmarks-with.html

Get involved
Create the benchmarks as TestCases
Check automatically for benchmark ﬁles and run like %nose.test()
More setup and teardown control
Group benchmarks at the same graph

Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks

db.py
import
sqlite3

class
BenchmarkDb(object):

"""

Persistence
handler
for
bechmark
results

"""

def
_create_tables(self):

self._cursor.execute("drop
table
if
exists
benchmarksuites")

table
if
exists
benchmarks")

table
if
exists
results")

...

self._cursor.execute('CREATE
TABLE

benchmarks(checksum
text
PRIMARY
KEY,

name
text,
description
text,
suite_id
integer,

FOREIGN
KEY(suite_id)
REFERENCES
benchmarksuites(id))')

self._cursor.execute('CREATE
TABLE
results(id
integer

PRIMARY
KEY
AUTOINCREMENT,
checksum
text,

timestamp
timestamp,
ncalls
text,
timing
float,
traceback
text,

FOREIGN
KEY(checksum)
REFERENCES
benchmarks(checksum))')

self._con.commit()

def
write_benchmark(self,
bm,
suite=None):

if
suite
is
not
None:

self._cursor.execute('SELECT
id
FROM
benchmarksuites

where
name
=
"%s"'
%
suite.name)

row
=
self._cursor.fetchone()

else:

row
=
None

if
row
==
None:

self._cursor.execute('INSERT
INTO
benchmarks
VALUES
(?,
?,
?,
?)',

(bm.checksum,
bm.name,
bm.description,
None))

else:

self._cursor.execute('INSERT
INTO
benchmarks
VALUES
(?,
?,
?,
?)',

(bm.checksum,
bm.name,
bm.description,
row[0]))

Git Repo
class
GitRepository(Repository):

"""

Read
some
basic
statistics
about
a
git
repository

"""

def
__init__(self,
repo_path):

self.repo_path
=
repo_path

self.git
=
_git_command(self.repo_path)

(self.shas,
self.messages,

self.timestamps,
self.authors)
=
self._parse_commit_log()
[('d87fdf2', datetime.datetime(2013, 3, 22, 16, 55, 38)), ('a90a449', datetime.datetime(2013, 3, 22, 16, 54, 36)),
('fe66a86', datetime.datetime(2013, 3, 22, 16, 51, 2)), ('bea6b21', datetime.datetime(2013, 3, 22, 13, 14, 22)),
('bde5e63', datetime.datetime(2013, 3, 22, 5, 2, 56)), ('89634f6', datetime.datetime(2013, 3, 20, 4, 16, 19))]

Git Repo
class
BenchmarkRepository(object):

"""

Manage
an
isolated
copy
of
a
repository
for
benchmarking

"""

...

def
_copy_repo(self):

if
os.path.exists(self.target_dir):

print
'Deleting
%s
first'
%
self.target_dir

#
response
=
raw_input('%s
exists,
delete?
y/n'
%
self.target_dir)

#
if
response
==
'n':

#

raise
Exception('foo')

cmd
=
'rm
-‐rf
%s'
%
self.target_dir

print
cmd

os.system(cmd)

self._clone(self.target_dir_tmp,
self.target_dir)

self._prep()

self._copy_benchmark_scripts_and_deps()

def
_clone(self,
source,
target):

cmd
=
'git
clone
%s
%s'
%
(source,
target)

print
cmd

os.system(cmd)

def
_copy_benchmark_scripts_and_deps(self):

pth,
_
=
os.path.split(os.path.abspath(__file__))

deps
=
[os.path.join(pth,
'run_benchmarks.py')]

if
self.dependencies
is
not
None:

deps.extend(self.dependencies)

for
dep
in
deps:

cmd
=
'cp
%s
%s'
%
(dep,
self.target_dir)

print
cmd

proc
=
subprocess.Popen(cmd,
shell=True)

proc.wait()

New Runner

class
BenchmarkGitRunner(BenchmarkRunner):

...

def
_register_benchmarks(self):

ex_benchmarks
=
self.db.get_benchmarks()

db_checksums
=
set(ex_benchmarks.index)

for
bm
in
self.benchmarks:

if
bm.checksum
in
db_checksums:

self.db.update_name(bm)

else:

print
'Writing
new
benchmark
%s,
%s'
%
(bm.name,

bm.checksum)

self.db.write_benchmark(bm)

New runner

class

...

def
_run_revision(self,
rev):

need_to_run
=
self._get_benchmarks_for_rev(rev)

if
not
need_to_run:

print
'No
benchmarks
need
running
at
%s'
%
rev

return
0,
{}

print
'Running
%d
benchmarks
for
revision
%s'
%
(len(need_to_run),
rev)

for
bm
in
need_to_run:

print
bm.name

self.bench_repo.switch_to_revision(rev)

pickle_path
=
os.path.join(self.tmp_dir,
'benchmarks.pickle')

results_path
=
os.path.join(self.tmp_dir,
'results.pickle')

if
os.path.exists(results_path):

os.remove(results_path)

pickle.dump(need_to_run,
open(pickle_path,
'w'))

#
run
the
process

cmd
=
'python
%s/run_benchmarks.py
%s
%s'
%
(pickle_path,
results_path)

print
cmd

proc
=
stdout=subprocess.PIPE,

stderr=subprocess.PIPE,

shell=True,

cwd=self.tmp_dir)

stdout,
stderr
=
proc.communicate()

New runner

class

...

def
_run_revision(self,
rev):

need_to_run
=
self._get_benchmarks_for_rev(rev)

if
not
need_to_run:

print
'No
benchmarks
need
running
at
%s'
%
rev

return
0,
{}

print
'Running
%d
benchmarks
for
revision
%s'
%
(len(need_to_run),
rev)

for
bm
in
need_to_run:

print
bm.name

self.bench_repo.switch_to_revision(rev)

#
run
the
process

cmd
=
'python
%s/run_benchmarks.py
%s
%s'
%
(pickle_path,
results_path)

print
cmd

proc
=
stdout=subprocess.PIPE,

stderr=subprocess.PIPE,

shell=True,

cwd=self.tmp_dir)

stdout,
stderr
=
proc.communicate()

if
stderr:

if
("object
has
no
attribute"
in
stderr
or

'ImportError'
in
stderr):

print
stderr

print
'HARD
CLEANING!'

self.bench_repo.hard_clean()

print
stderr

if
not
os.path.exists(results_path):

print
'Failed
for
revision
%s'
%
rev

return
len(need_to_run),
{}

results
=
pickle.load(open(results_path,
'r'))

Running
from
benchmark
import
Benchmark,
BenchmarkRepository,
BenchmarkGitRunner
try:

REPO_PATH
=
config.get('setup',
'repo_path')

REPO_URL
=
config.get('setup',
'repo_url')

DB_PATH
=
config.get('setup',
'db_path')

TMP_DIR
=
config.get('setup',
'tmp_dir')
except:

REPO_PATH
=
os.path.abspath(os.path.join(os.path.dirname(__file__),

"../"))

REPO_URL
=
'git@github.com:python-‐recsys/crab.git'

DB_PATH
=
os.path.join(REPO_PATH,
'suite/benchmarks.db')

TMP_DIR
=
os.path.join(HOME,
'tmp/base_benchy/')
PREPARE
=
"""
python
setup.py
clean
"""
BUILD
=
"""
python
setup.py
build_ext
-‐-‐inplace
"""
repo
=
BenchmarkRepository(REPO_PATH,
REPO_URL,
DB_PATH,
TMP_DIR)

Running

common_setup
=
"""

import
numpy

from
crab.metrics
import
cosine_distances

X
=
numpy.random.uniform(1,5,(1000,))

"""

bench
=
setup_bk1,
name="Crab

Cosine")

suite
=
BenchmarkSuite()

suite.append(bench)

statement
=
"cosine_distances(X,
X)"

runner
=
BenchmarkGitRunner(suite,
'.',
'Absolute

timing
in
ms')

n_benchs,
results
=
runner.run()

runner.plot_history(results)

plt.show()

Improvements
Historical commits from version control now
benchmarked

Working now:
Module detection
by_module
=
{}
benchmarks
=
[]
modules
=
['metrics',

'recommenders',

'similarities']
for
modname
in
modules:

ref
=
__import__(modname)

by_module[modname]
=
[v
for
v
in
ref.__dict__.values()

if
isinstance(v,
Benchmark)]

benchmarks.extend(by_module[modname])
for
bm
in
benchmarks:

assert(bm.name
is
not
None)

https://github.com/python-recsys/benchy
Forks and pull requests are welcomed!

Benchy, python framework for performance benchmarking of Python Scripts

More Related Content

What's hot

Viewers also liked

Similar to Benchy, python framework for performance benchmarking of Python Scripts

More from Marcel Caraciolo

Recently uploaded

Benchy, python framework for performance benchmarking of Python Scripts