Skip to content

Commit

Permalink
Merge pull request #282 from vidartf/optimize
Browse files Browse the repository at this point in the history
Add caching for mimedata comparison
  • Loading branch information
minrk authored Apr 21, 2017
2 parents c8f5544 + a5f5423 commit 73f5f70
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 15 deletions.
32 changes: 22 additions & 10 deletions nbdime/diffing/notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,25 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from __future__ import unicode_literals

"""Tools for diffing notebooks.
All diff tools here currently assumes the notebooks have already been
converted to the same format version, currently v4 at time of writing.
Up- and down-conversion is handled by nbformat.
"""

from __future__ import unicode_literals

import operator
import re
import copy
from collections import defaultdict
from six import string_types
from six.moves import zip
try:
from functools import lru_cache
except ImportError:
from backports.functools_lru_cache import lru_cache

from ..diff_format import MappingDiffBuilder, DiffOp

Expand Down Expand Up @@ -49,6 +53,7 @@
# an argument instead of separate functions.


@lru_cache(maxsize=1024, typed=False)
def compare_text_approximate(x, y):
# Fast cutoff when one is empty
if bool(x) != bool(y):
Expand Down Expand Up @@ -90,6 +95,15 @@ def compare_base64_strict(x, y):
return x == y


@lru_cache(maxsize=128, typed=False)
def _compare_mimedata_strings(x, y, comp_text, comp_base64):
# Most likely base64 encoded data
if _base64.match(x):
return comp_base64(x, y)
else:
return comp_text(x, y)


def _compare_mimedata(mimetype, x, y, comp_text, comp_base64):
mimetype = mimetype.lower()

Expand All @@ -108,12 +122,7 @@ def _compare_mimedata(mimetype, x, y, comp_text, comp_base64):
# TODO: Compare binary images?
#if mimetype.startswith("image/"):
if isinstance(x, string_types) and isinstance(y, string_types):
# Most likely base64 encoded data
if _base64.match(x):
return comp_base64(x, y)
else:
return comp_text(x, y)

_compare_mimedata_strings(x, y, comp_text, comp_base64)
# Fallback to exactly equal
return x == y

Expand Down Expand Up @@ -357,17 +366,20 @@ def diff_single_outputs(a, b, path="/cells/*/outputs/*",
if a.output_type in ("execute_result", "display_data"):
di = MappingDiffBuilder()

# Separate data from output during diffing:
tmp_data = a.pop('data')
a_conj = copy.deepcopy(a)
a.data = tmp_data
a_conj = copy.deepcopy(a) # Output without data
a.data = tmp_data # Restore output
tmp_data = b.pop('data')
b_conj = copy.deepcopy(b)
b.data = tmp_data
# Only diff outputs without data:
dd_conj = diff(a_conj, b_conj)
if dd_conj:
for e in dd_conj:
di.append(e)

# Only diff data:
dd = diff_mime_bundle(a.data, b.data, path=path+"/data")
if dd:
di.patch("data", dd)
Expand Down
25 changes: 20 additions & 5 deletions nbdime/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import time
import contextlib
from tabulate import tabulate
from functools import wraps


def _sort_time(value):
Expand All @@ -55,18 +56,31 @@ def __init__(self, verbose=False, enabled=True):

@contextlib.contextmanager
def time(self, key):
if not self.enabled:
yield
return
start = time.time()
yield
end = time.time()
secs = end - start
if not self.enabled:
return
if key in self.map:
self.map[key]['time'] += secs
self.map[key]['calls'] += 1
else:
self.map[key] = dict(time=secs, calls=1)

def profile(self, key=None):
def decorator(function):
nonlocal key
if key is None:
key = function.__name__ or 'unknown'
@wraps(function)
def inner(*args, **kwargs):
with self.time(key):
return function(*args, **kwargs)
return inner
return decorator

@contextlib.contextmanager
def enable(self):
old = self.enabled
Expand All @@ -88,8 +102,8 @@ def __str__(self):
for key, data in items:
time = data['time']
calls = data['calls']
lines.append((key, calls, time))
return tabulate(lines, headers=['Key', 'Calls', 'Time'])
lines.append((key, calls, time, time / calls))
return tabulate(lines, headers=['Key', 'Calls', 'Time', 'Time/Call'])


timer = TimePaths(enabled=False)
Expand All @@ -98,7 +112,8 @@ def __str__(self):
def profile_diff_paths(args=None):
import nbdime.nbdiffapp
import nbdime.profiling
nbdime.nbdiffapp.main(args)
with nbdime.profiling.timer.enable():
nbdime.nbdiffapp.main(args)
data = str(nbdime.profiling.timer)
print(data)

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def run(self):

':python_version == "2.7"': [
'backports.shutil_which',
'backports.functools_lru_cache',
],
}

Expand Down

0 comments on commit 73f5f70

Please sign in to comment.