Skip to content

Commit

Permalink
Merge pull request #257 from vidartf/fixes
Browse files Browse the repository at this point in the history
Various fixes and minor adjustments
  • Loading branch information
vidartf authored Feb 16, 2017
2 parents 9e6f400 + 4880e52 commit 352e8a5
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 41 deletions.
27 changes: 7 additions & 20 deletions nbdime-web/src/common/dragpanel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,6 @@ import {
} from 'phosphor/lib/dom/dragdrop';


//TODO: Remove when fixed:
/**
* Workaround for phosphor bug #165.
*/
class WorkaroundDrag extends Drag {
}

function _attachDragImage(clientX: number, clientY: number): void {
(Drag as any).prototype._attachDragImage.call(this, clientX + window.pageXOffset, clientY + window.pageYOffset);
}
function _moveDragImage(clientX: number, clientY: number): void {
(Drag as any).prototype._moveDragImage.call(this, clientX + window.pageXOffset, clientY + window.pageYOffset);
}

// monkeypatch:
(WorkaroundDrag.prototype as any)._attachDragImage = _attachDragImage;
(WorkaroundDrag.prototype as any)._moveDragImage = _moveDragImage;


/**
* The class name added to the DropPanel
*/
Expand Down Expand Up @@ -464,7 +445,7 @@ abstract class DragDropPanelBase extends DropPanel {
let dragImage = this.getDragImage(handle);

// Set up the drag event.
this.drag = new WorkaroundDrag({
this.drag = new Drag({
dragImage: dragImage || undefined,
mimeData: new MimeData(),
supportedActions: 'all',
Expand All @@ -475,6 +456,12 @@ abstract class DragDropPanelBase extends DropPanel {

// Start the drag and remove the mousemove listener.
this.drag.start(clientX, clientY).then(this.onDragComplete.bind(this));
//TODO: Remove when updating to phosphor 1.0:
// Workaround for phosphor bug #165:
if (dragImage) {
dragImage.style.position = 'fixed';
}
// End workaround
document.removeEventListener('mousemove', this, true);
document.removeEventListener('mouseup', this, true);
}
Expand Down
1 change: 1 addition & 0 deletions nbdime/diff_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def op_removerange(key, length):

def op_patch(key, diff):
"Create a diff entry to patch value at key with diff."
assert diff is not None
return DiffEntry(op=DiffOp.PATCH, key=key, diff=diff)


Expand Down
12 changes: 10 additions & 2 deletions nbdime/diffing/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,13 @@ def compare_strings_approximate(x, y, threshold=0.7):
# TODO: Add configuration framework
# TODO: Tune threshold with realistic sources

# Cutoff on equality (Python has fast hash functions for strings)
if x == y:
# Fast cutoff when one is empty
if bool(x) != bool(y):
return False

# Cutoff on equality: Python has fast hash functions for strings,
# and lists of strings also works fine
if len(x) == len(y) and x == y:
return True

# TODO: Investigate performance and quality of this difflib ratio approach,
Expand All @@ -55,9 +60,12 @@ def compare_strings_approximate(x, y, threshold=0.7):
# equal items, at least in the Myers diff algorithm.
# Most other comparisons will likely not be very similar,
# and the (real_)quick_ratio cutoffs will speed up those.

# So the heavy ratio function is only used for close calls.
# s = difflib.SequenceMatcher(lambda c: c in (" ", "\t"), x, y, autojunk=False)
s = difflib.SequenceMatcher(None, x, y, autojunk=False)

# Use only the fast ratio approximations first
if s.real_quick_ratio() < threshold:
return False
if s.quick_ratio() < threshold:
Expand Down
12 changes: 7 additions & 5 deletions nbdime/diffing/notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
__all__ = ["diff_notebooks"]

# A regexp matching base64 encoded data
_base64 = re.compile(r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$', re.MULTILINE | re.UNICODE)
_base64 = re.compile(r'^(?:[a-z0-9+/]{4})*(?:[a-z0-9+/]{2}==|[a-z0-9+/]{3}=)?$', re.MULTILINE | re.UNICODE | re.IGNORECASE)

# A regexp matching common python repr-style output like
# <module.type at 0xmemoryaddress>
re_repr = re.compile(r"<[a-zA-Z0-9._]+ at 0x[a-zA-Z0-9]+>")
re_repr = re.compile(r"<[a-z0-9._]+ at 0x[a-f0-9]{8,16}>", re.IGNORECASE)

re_pointer = re.compile(r"0[xX][a-zA-Z0-9]{8,16}")
re_pointer = re.compile(r"0x[a-f0-9]{8,16}", re.IGNORECASE)


# List of mimes we can diff recursively
Expand Down Expand Up @@ -357,10 +357,12 @@ def diff_single_outputs(a, b, path="/cells/*/outputs/*",
if a.output_type in ("execute_result", "display_data"):
di = MappingDiffBuilder()

tmp_data = a.pop('data')
a_conj = copy.deepcopy(a)
del a_conj['data']
a.data = tmp_data
tmp_data = b.pop('data')
b_conj = copy.deepcopy(b)
del b_conj['data']
b.data = tmp_data
dd_conj = diff(a_conj, b_conj)
if dd_conj:
for e in dd_conj:
Expand Down
1 change: 0 additions & 1 deletion nbdime/diffing/snakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
Utilities for computing 'snakes', or contiguous sequences of equal elements of two sequences.
"""

import operator
from ..diff_format import SequenceDiffBuilder
from .seq_bruteforce import bruteforce_compute_snakes

Expand Down
6 changes: 3 additions & 3 deletions nbdime/merging/decisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,10 @@ def push_patch_decision(decision, prefix):
assert dec.common_path[-1] == key, "Key %s not at end of %s" % (
key, dec.common_path)
dec.common_path = dec.common_path[:-1] # pop key
dec.local_diff = [op_patch(key, dec.local_diff)]
dec.remote_diff = [op_patch(key, dec.remote_diff)]
dec.local_diff = [op_patch(key, dec.local_diff)] if dec.local_diff else []
dec.remote_diff = [op_patch(key, dec.remote_diff)] if dec.remote_diff else []
if dec.action == "custom":
dec.custom_diff = [op_patch(key, dec.custom_diff)]
dec.custom_diff = [op_patch(key, dec.custom_diff)] if dec.custom_diff else []
return dec


Expand Down
15 changes: 5 additions & 10 deletions nbdime/merging/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,6 @@
from nbdime.merging.decisions import pop_all_patch_decisions, push_patch_decision


# FIXME XXX: Main todos left:
# - Rewrite resolve_* to work directly on decisions object,
# trying to resolve any conflicted decisions found
# - _merge_strings is probably broken, test and fix
# - extend test_merge_notebooks_inline with more tests
# - make existing test suite pass, either by modifying tests or fixing remaining bugs
# - test that this still works with mergetool (it shouldn't need any changes in principle)
# - remove autoresolve.py after checking for anything more we need from there


# =============================================================================
#
# Utility code follows
Expand Down Expand Up @@ -167,6 +157,8 @@ def __unused__wrap_subconflicts(key, subconflicts):
for conf in subconflicts:
(path, ld, rd, strategy) = conf
assert path[-1] == key
assert ld is not None
assert rd is not None
path = path[:-1]
ld = [op_patch(key, ld)]
rd = [op_patch(key, rd)]
Expand Down Expand Up @@ -202,6 +194,7 @@ def adjust_patch_level(target_path, common_path, diff):
newdiff = []
for d in diff:
nd = d
assert nd is not None
for key in remainder_path:
nd = op_patch(key, nd)
newdiff.append(nd)
Expand All @@ -216,6 +209,8 @@ def collect_diffs(path, decisions):
rd = adjust_patch_level(path, d.common_path, d.remote_diff)
local_diff.extend(ld)
remote_diff.extend(rd)
local_diff = combine_patches(local_diff)
remote_diff = combine_patches(remote_diff)
return local_diff, remote_diff


Expand Down
107 changes: 107 additions & 0 deletions nbdime/profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@

"""Tools for profiling diff/merge performance.
Sometimes diffing times can become unacceptably long.
Then the tools in this module can help you profile and figure out
where the time is spent. For a more rigorous profiling, consider using
cPython, but for initial considerations these should be helpful.
Typical profiling usage:
Add some statements like
from nbdime.profiling import timer
with timer.time('Key to identify this segment'):
<code to time>
Then, launch `python -m nbdime.profiling notebookA.ipynb notebookB.ipynb`.
Example case:
Output diffing is slow. By cPython/timer, the time spent has been narrowed
to comparison of MIME types (`_compare_mimedata()`). To figure out which
MIME type is taking all the time, the following is added to that function:
with timer.time(mimetype):
<existing function body>
The output becomes:
Key Calls Time
---------- ------- ------------
text/html 6 74.9686
text/plain 12 0.000500202
image/png 5 0
From this we can see that the HTML comparison is really blowing up!
Further inspection reveals that there are two html outputs in each
notebook that contain an embedded base64 image. These caused the text
diff to blow up.
"""

import time
import contextlib
from tabulate import tabulate


def _sort_time(value):
time = value[1]['time']
return -time


class TimePaths(object):
def __init__(self, verbose=False, enabled=True):
self.verbose = verbose
self.map = {}
self.enabled = enabled

@contextlib.contextmanager
def time(self, key):
start = time.time()
yield
end = time.time()
secs = end - start
if not self.enabled:
return
if key in self.map:
self.map[key]['time'] += secs
self.map[key]['calls'] += 1
else:
self.map[key] = dict(time=secs, calls=1)

@contextlib.contextmanager
def enable(self):
old = self.enabled
self.enabled = True
yield
self.enabled = old

@contextlib.contextmanager
def disable(self):
old = self.enabled
self.enabled = False
yield
self.enabled = old

def __str__(self):
# First, sort by path
items = sorted(self.map.items(), key=_sort_time)
lines = []
for key, data in items:
time = data['time']
calls = data['calls']
lines.append((key, calls, time))
return tabulate(lines, headers=['Key', 'Calls', 'Time'])


timer = TimePaths(enabled=False)


def profile_diff_paths(args=None):
import nbdime.nbdiffapp
import nbdime.profiling
nbdime.nbdiffapp.main(args)
data = str(nbdime.profiling.timer)
print(data)


if __name__ == "__main__":
profile_diff_paths()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def run(self):
'jsonschema',
'mock',
'requests',
'tabulate', # For profiling
],
'docs': [
'sphinx',
Expand Down

0 comments on commit 352e8a5

Please sign in to comment.