Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add caching for mimedata comparison #282

Merged
merged 4 commits into from
Apr 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions nbdime/diffing/notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,25 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from __future__ import unicode_literals

"""Tools for diffing notebooks.

All diff tools here currently assumes the notebooks have already been
converted to the same format version, currently v4 at time of writing.
Up- and down-conversion is handled by nbformat.
"""

from __future__ import unicode_literals

import operator
import re
import copy
from collections import defaultdict
from six import string_types
from six.moves import zip
try:
from functools import lru_cache
except ImportError:
from backports.functools_lru_cache import lru_cache

from ..diff_format import MappingDiffBuilder, DiffOp

Expand Down Expand Up @@ -49,6 +53,7 @@
# an argument instead of separate functions.


@lru_cache(maxsize=1024, typed=False)
def compare_text_approximate(x, y):
# Fast cutoff when one is empty
if bool(x) != bool(y):
Expand Down Expand Up @@ -90,6 +95,15 @@ def compare_base64_strict(x, y):
return x == y


@lru_cache(maxsize=128, typed=False)
def _compare_mimedata_strings(x, y, comp_text, comp_base64):
# Most likely base64 encoded data
if _base64.match(x):
return comp_base64(x, y)
else:
return comp_text(x, y)


def _compare_mimedata(mimetype, x, y, comp_text, comp_base64):
mimetype = mimetype.lower()

Expand All @@ -108,12 +122,7 @@ def _compare_mimedata(mimetype, x, y, comp_text, comp_base64):
# TODO: Compare binary images?
#if mimetype.startswith("image/"):
if isinstance(x, string_types) and isinstance(y, string_types):
# Most likely base64 encoded data
if _base64.match(x):
return comp_base64(x, y)
else:
return comp_text(x, y)

_compare_mimedata_strings(x, y, comp_text, comp_base64)
# Fallback to exactly equal
return x == y

Expand Down Expand Up @@ -357,17 +366,20 @@ def diff_single_outputs(a, b, path="/cells/*/outputs/*",
if a.output_type in ("execute_result", "display_data"):
di = MappingDiffBuilder()

# Separate data from output during diffing:
tmp_data = a.pop('data')
a_conj = copy.deepcopy(a)
a.data = tmp_data
a_conj = copy.deepcopy(a) # Output without data
a.data = tmp_data # Restore output
tmp_data = b.pop('data')
b_conj = copy.deepcopy(b)
b.data = tmp_data
# Only diff outputs without data:
dd_conj = diff(a_conj, b_conj)
if dd_conj:
for e in dd_conj:
di.append(e)

# Only diff data:
dd = diff_mime_bundle(a.data, b.data, path=path+"/data")
if dd:
di.patch("data", dd)
Expand Down
25 changes: 20 additions & 5 deletions nbdime/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import time
import contextlib
from tabulate import tabulate
from functools import wraps


def _sort_time(value):
Expand All @@ -55,18 +56,31 @@ def __init__(self, verbose=False, enabled=True):

@contextlib.contextmanager
def time(self, key):
if not self.enabled:
yield
return
start = time.time()
yield
end = time.time()
secs = end - start
if not self.enabled:
return
if key in self.map:
self.map[key]['time'] += secs
self.map[key]['calls'] += 1
else:
self.map[key] = dict(time=secs, calls=1)

def profile(self, key=None):
def decorator(function):
nonlocal key
if key is None:
key = function.__name__ or 'unknown'
@wraps(function)
def inner(*args, **kwargs):
with self.time(key):
return function(*args, **kwargs)
return inner
return decorator

@contextlib.contextmanager
def enable(self):
old = self.enabled
Expand All @@ -88,8 +102,8 @@ def __str__(self):
for key, data in items:
time = data['time']
calls = data['calls']
lines.append((key, calls, time))
return tabulate(lines, headers=['Key', 'Calls', 'Time'])
lines.append((key, calls, time, time / calls))
return tabulate(lines, headers=['Key', 'Calls', 'Time', 'Time/Call'])


timer = TimePaths(enabled=False)
Expand All @@ -98,7 +112,8 @@ def __str__(self):
def profile_diff_paths(args=None):
import nbdime.nbdiffapp
import nbdime.profiling
nbdime.nbdiffapp.main(args)
with nbdime.profiling.timer.enable():
nbdime.nbdiffapp.main(args)
data = str(nbdime.profiling.timer)
print(data)

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def run(self):

':python_version == "2.7"': [
'backports.shutil_which',
'backports.functools_lru_cache',
],
}

Expand Down