diff --git a/nbdime-web/src/common/dragpanel.ts b/nbdime-web/src/common/dragpanel.ts index f4e879a7..89fc5658 100644 --- a/nbdime-web/src/common/dragpanel.ts +++ b/nbdime-web/src/common/dragpanel.ts @@ -31,25 +31,6 @@ import { } from 'phosphor/lib/dom/dragdrop'; -//TODO: Remove when fixed: -/** - * Workaround for phosphor bug #165. - */ -class WorkaroundDrag extends Drag { -} - -function _attachDragImage(clientX: number, clientY: number): void { - (Drag as any).prototype._attachDragImage.call(this, clientX + window.pageXOffset, clientY + window.pageYOffset); -} -function _moveDragImage(clientX: number, clientY: number): void { - (Drag as any).prototype._moveDragImage.call(this, clientX + window.pageXOffset, clientY + window.pageYOffset); -} - -// monkeypatch: -(WorkaroundDrag.prototype as any)._attachDragImage = _attachDragImage; -(WorkaroundDrag.prototype as any)._moveDragImage = _moveDragImage; - - /** * The class name added to the DropPanel */ @@ -464,7 +445,7 @@ abstract class DragDropPanelBase extends DropPanel { let dragImage = this.getDragImage(handle); // Set up the drag event. - this.drag = new WorkaroundDrag({ + this.drag = new Drag({ dragImage: dragImage || undefined, mimeData: new MimeData(), supportedActions: 'all', @@ -475,6 +456,12 @@ abstract class DragDropPanelBase extends DropPanel { // Start the drag and remove the mousemove listener. this.drag.start(clientX, clientY).then(this.onDragComplete.bind(this)); + //TODO: Remove when updating to phosphor 1.0: + // Workaround for phosphor bug #165: + if (dragImage) { + dragImage.style.position = 'fixed'; + } + // End workaround document.removeEventListener('mousemove', this, true); document.removeEventListener('mouseup', this, true); } diff --git a/nbdime/diff_format.py b/nbdime/diff_format.py index 4c6a2ba3..759d1d19 100644 --- a/nbdime/diff_format.py +++ b/nbdime/diff_format.py @@ -94,6 +94,7 @@ def op_removerange(key, length): def op_patch(key, diff): "Create a diff entry to patch value at key with diff." + assert diff is not None return DiffEntry(op=DiffOp.PATCH, key=key, diff=diff) diff --git a/nbdime/diffing/generic.py b/nbdime/diffing/generic.py index a4bd5c2f..1686cef3 100644 --- a/nbdime/diffing/generic.py +++ b/nbdime/diffing/generic.py @@ -38,8 +38,13 @@ def compare_strings_approximate(x, y, threshold=0.7): # TODO: Add configuration framework # TODO: Tune threshold with realistic sources - # Cutoff on equality (Python has fast hash functions for strings) - if x == y: + # Fast cutoff when one is empty + if bool(x) != bool(y): + return False + + # Cutoff on equality: Python has fast hash functions for strings, + # and lists of strings also works fine + if len(x) == len(y) and x == y: return True # TODO: Investigate performance and quality of this difflib ratio approach, @@ -55,9 +60,12 @@ def compare_strings_approximate(x, y, threshold=0.7): # equal items, at least in the Myers diff algorithm. # Most other comparisons will likely not be very similar, # and the (real_)quick_ratio cutoffs will speed up those. + # So the heavy ratio function is only used for close calls. # s = difflib.SequenceMatcher(lambda c: c in (" ", "\t"), x, y, autojunk=False) s = difflib.SequenceMatcher(None, x, y, autojunk=False) + + # Use only the fast ratio approximations first if s.real_quick_ratio() < threshold: return False if s.quick_ratio() < threshold: diff --git a/nbdime/diffing/notebooks.py b/nbdime/diffing/notebooks.py index dd4f79f5..829ba42a 100644 --- a/nbdime/diffing/notebooks.py +++ b/nbdime/diffing/notebooks.py @@ -27,13 +27,13 @@ __all__ = ["diff_notebooks"] # A regexp matching base64 encoded data -_base64 = re.compile(r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$', re.MULTILINE | re.UNICODE) +_base64 = re.compile(r'^(?:[a-z0-9+/]{4})*(?:[a-z0-9+/]{2}==|[a-z0-9+/]{3}=)?$', re.MULTILINE | re.UNICODE | re.IGNORECASE) # A regexp matching common python repr-style output like # -re_repr = re.compile(r"<[a-zA-Z0-9._]+ at 0x[a-zA-Z0-9]+>") +re_repr = re.compile(r"<[a-z0-9._]+ at 0x[a-f0-9]{8,16}>", re.IGNORECASE) -re_pointer = re.compile(r"0[xX][a-zA-Z0-9]{8,16}") +re_pointer = re.compile(r"0x[a-f0-9]{8,16}", re.IGNORECASE) # List of mimes we can diff recursively @@ -357,10 +357,12 @@ def diff_single_outputs(a, b, path="/cells/*/outputs/*", if a.output_type in ("execute_result", "display_data"): di = MappingDiffBuilder() + tmp_data = a.pop('data') a_conj = copy.deepcopy(a) - del a_conj['data'] + a.data = tmp_data + tmp_data = b.pop('data') b_conj = copy.deepcopy(b) - del b_conj['data'] + b.data = tmp_data dd_conj = diff(a_conj, b_conj) if dd_conj: for e in dd_conj: diff --git a/nbdime/diffing/snakes.py b/nbdime/diffing/snakes.py index 133dd2e7..3c421532 100644 --- a/nbdime/diffing/snakes.py +++ b/nbdime/diffing/snakes.py @@ -9,7 +9,6 @@ Utilities for computing 'snakes', or contiguous sequences of equal elements of two sequences. """ -import operator from ..diff_format import SequenceDiffBuilder from .seq_bruteforce import bruteforce_compute_snakes diff --git a/nbdime/merging/decisions.py b/nbdime/merging/decisions.py index 1050de69..0c25cf81 100644 --- a/nbdime/merging/decisions.py +++ b/nbdime/merging/decisions.py @@ -413,10 +413,10 @@ def push_patch_decision(decision, prefix): assert dec.common_path[-1] == key, "Key %s not at end of %s" % ( key, dec.common_path) dec.common_path = dec.common_path[:-1] # pop key - dec.local_diff = [op_patch(key, dec.local_diff)] - dec.remote_diff = [op_patch(key, dec.remote_diff)] + dec.local_diff = [op_patch(key, dec.local_diff)] if dec.local_diff else [] + dec.remote_diff = [op_patch(key, dec.remote_diff)] if dec.remote_diff else [] if dec.action == "custom": - dec.custom_diff = [op_patch(key, dec.custom_diff)] + dec.custom_diff = [op_patch(key, dec.custom_diff)] if dec.custom_diff else [] return dec diff --git a/nbdime/merging/generic.py b/nbdime/merging/generic.py index ca1cc817..c65584d5 100644 --- a/nbdime/merging/generic.py +++ b/nbdime/merging/generic.py @@ -25,16 +25,6 @@ from nbdime.merging.decisions import pop_all_patch_decisions, push_patch_decision -# FIXME XXX: Main todos left: -# - Rewrite resolve_* to work directly on decisions object, -# trying to resolve any conflicted decisions found -# - _merge_strings is probably broken, test and fix -# - extend test_merge_notebooks_inline with more tests -# - make existing test suite pass, either by modifying tests or fixing remaining bugs -# - test that this still works with mergetool (it shouldn't need any changes in principle) -# - remove autoresolve.py after checking for anything more we need from there - - # ============================================================================= # # Utility code follows @@ -167,6 +157,8 @@ def __unused__wrap_subconflicts(key, subconflicts): for conf in subconflicts: (path, ld, rd, strategy) = conf assert path[-1] == key + assert ld is not None + assert rd is not None path = path[:-1] ld = [op_patch(key, ld)] rd = [op_patch(key, rd)] @@ -202,6 +194,7 @@ def adjust_patch_level(target_path, common_path, diff): newdiff = [] for d in diff: nd = d + assert nd is not None for key in remainder_path: nd = op_patch(key, nd) newdiff.append(nd) @@ -216,6 +209,8 @@ def collect_diffs(path, decisions): rd = adjust_patch_level(path, d.common_path, d.remote_diff) local_diff.extend(ld) remote_diff.extend(rd) + local_diff = combine_patches(local_diff) + remote_diff = combine_patches(remote_diff) return local_diff, remote_diff diff --git a/nbdime/profiling.py b/nbdime/profiling.py new file mode 100644 index 00000000..87340b32 --- /dev/null +++ b/nbdime/profiling.py @@ -0,0 +1,107 @@ + +"""Tools for profiling diff/merge performance. + +Sometimes diffing times can become unacceptably long. +Then the tools in this module can help you profile and figure out +where the time is spent. For a more rigorous profiling, consider using +cPython, but for initial considerations these should be helpful. + +Typical profiling usage: +Add some statements like +from nbdime.profiling import timer +with timer.time('Key to identify this segment'): + + +Then, launch `python -m nbdime.profiling notebookA.ipynb notebookB.ipynb`. + +Example case: +Output diffing is slow. By cPython/timer, the time spent has been narrowed +to comparison of MIME types (`_compare_mimedata()`). To figure out which +MIME type is taking all the time, the following is added to that function: + + with timer.time(mimetype): + + +The output becomes: + + Key Calls Time + ---------- ------- ------------ + text/html 6 74.9686 + text/plain 12 0.000500202 + image/png 5 0 + +From this we can see that the HTML comparison is really blowing up! +Further inspection reveals that there are two html outputs in each +notebook that contain an embedded base64 image. These caused the text +diff to blow up. + +""" + +import time +import contextlib +from tabulate import tabulate + + +def _sort_time(value): + time = value[1]['time'] + return -time + + +class TimePaths(object): + def __init__(self, verbose=False, enabled=True): + self.verbose = verbose + self.map = {} + self.enabled = enabled + + @contextlib.contextmanager + def time(self, key): + start = time.time() + yield + end = time.time() + secs = end - start + if not self.enabled: + return + if key in self.map: + self.map[key]['time'] += secs + self.map[key]['calls'] += 1 + else: + self.map[key] = dict(time=secs, calls=1) + + @contextlib.contextmanager + def enable(self): + old = self.enabled + self.enabled = True + yield + self.enabled = old + + @contextlib.contextmanager + def disable(self): + old = self.enabled + self.enabled = False + yield + self.enabled = old + + def __str__(self): + # First, sort by path + items = sorted(self.map.items(), key=_sort_time) + lines = [] + for key, data in items: + time = data['time'] + calls = data['calls'] + lines.append((key, calls, time)) + return tabulate(lines, headers=['Key', 'Calls', 'Time']) + + +timer = TimePaths(enabled=False) + + +def profile_diff_paths(args=None): + import nbdime.nbdiffapp + import nbdime.profiling + nbdime.nbdiffapp.main(args) + data = str(nbdime.profiling.timer) + print(data) + + +if __name__ == "__main__": + profile_diff_paths() diff --git a/setup.py b/setup.py index 7f1c5e37..d4b7637e 100644 --- a/setup.py +++ b/setup.py @@ -245,6 +245,7 @@ def run(self): 'jsonschema', 'mock', 'requests', + 'tabulate', # For profiling ], 'docs': [ 'sphinx',