Skip to content

Commit

Permalink
newer version
Browse files Browse the repository at this point in the history
  • Loading branch information
jdfekete committed Feb 12, 2015
1 parent f2be93b commit 829d77c
Show file tree
Hide file tree
Showing 47 changed files with 4,391 additions and 110 deletions.
26 changes: 23 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
NODE_PATH = ./node_modules
JS_COMPILER = $(NODE_PATH)/uglify-js/bin/uglifyjs
JS_TESTER = $(NODE_PATH)/vows/bin/vows
JS_TESTER = $(NODE_PATH)/vows/bin/vows --nocolor -v

all: \
reorder.v1.js \
Expand All @@ -9,10 +9,30 @@ all: \

reorder.v1.js: \
src/core.js \
src/aliases.js \
src/debug.js \
src/mean.js \
src/sum.js \
src/distance.js \
src/range.js \
src/transpose.js \
src/correlation.js \
src/heap.js \
src/permutation.js \
src/graph.js \
src/dijkstra.js \
src/dist.js \
src/random.js \
src/permute.js \
src/leaforder.js
src/stablepermute.js \
src/hcluster.js \
src/leaforder.js \
src/order.js \
src/covariance.js \
src/poweriteration.js \
src/sortorder.js \
src/pca1d.js \
src/ca.js

test: all
@$(JS_TESTER)
Expand All @@ -38,4 +58,4 @@ package.json: src/package.js
@chmod a-w $@

clean:
rm -f reorder*.js package.json
rm -rf reorder*.js package.json node_modules
18 changes: 18 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Interaction:
drag row/columns
zoom in/out
MatLink

Reordering:
Dijkstra distance matrix
Koren multiscale ACE: http://dx.doi.org/10.1109/INFVIS.2002.1173159
Inverse Cuthill-McKee
Tables/bipartite networks
Biclustering
Contingency tables methods:
PCA ordering
Correspondence analysis
Chun-Hou Chen methods: http://gap.stat.sinica.edu.tw/Papers/GAP_2002.pdf
Elliptical
Barycenter method: http://www.informatica.si/PDF/29-3/13_Makinen-The%20Barycenter%20Heuristic....pdf

94 changes: 94 additions & 0 deletions orig/barjoseph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-

"""optimal dendrogram ordering
implementation of binary tree ordering described in [Bar-Joseph et al., 2003]
by Renaud Blanch.
[Bar-Joseph et al., 2003]
K-ary Clustering with Optimal Leaf Ordering for Gene Expression Data.
Ziv Bar-Joseph, Erik D. Demaine, David K. Gifford, Angèle M. Hamel,
Tommy S. Jaakkola and Nathan Srebro
Bioinformatics, 19(9), pp 1070-8, 2003
http://www.cs.cmu.edu/~zivbj/compBio/k-aryBio.pdf
"""


def optimal(v, S, left, right, is_leaf, is_empty):
"""return optimal ordering
v is the root node of a dendrogram
S is the similarity matrix i.e.
S(i, j) should return the similarity of the leaves i & j
left(node) is the root of the left subtree of node
right(node) is the root of the rigth subtree of node
is_leaf(node) is True iff node is a leaf
is_empty(node) is True iff node is a leaf subtree i.e.:
if is_leaf(node):
assert is_empty(left(node))
assert is_empty(right(node))
"""

from utils import memoise

S = memoise(S)

@memoise
def T(v):
"""leafs of subtree v"""
if is_empty(v):
return []
if is_leaf(v):
return [v]
return T(left(v)) + T(right(v))

@memoise
def M(v, i, j):
"""maximal order of v with leftmost leaf i and rightmost leaf j"""

# halting
if is_leaf(v):
return 0., [v]

# swapping sub-trees according to i and j
l, r = left(v), right(v)
L, R = T(l), T(r)
if i in L and j in R:
w, x = l, r
elif i in R and j in L:
w, x = r, l
else:
assert False, "%s not least common ancestor of %s & %s" % (v, i, j)

# restricting domain of k and l
Wl, Wr = T(left(w)), T(right(w))
Ks = Wl if i in Wr else Wr
if Ks == []:
Ks = [i]

Xl, Xr = T(left(x)), T(right(x))
Ls = Xl if j in Xr else Xr
if Ls == []:
Ls = [j]

# maximize similarity
maximum, order = float("inf"), None
for k in Ks:
w_maximum, w_order = M(w, i, k)
for l in Ls:
x_maximum, x_order = M(x, l, j)
similarity = w_maximum + S(k, l) + x_maximum
if similarity < maximum:
maximum, order = similarity, w_order + x_order

return maximum, order

# and now the external loop
maximum = float("inf")
for i in T(left(v)):
for j in T(right(v)):
similarity, order = M(v, i, j)
if similarity < maximum:
maximum, optimal_order = similarity, order

return optimal_order
44 changes: 44 additions & 0 deletions orig/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from hcluster import pdist, linkage, leaves_list, squareform, dendrogram
import numpy as np
import matplotlib as mp

metric = 'euclidean'
method = 'single'

data = np.matrix([
[1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 1, 1, 1, 1, 0, 0, 0]])

y = pdist(data, metric=metric)
Z = linkage(y, method=method, metric=metric)
dendrogram(Z)
Z = [(int(l), int(r), max(0., s), int(n)) for (l, r, s, n) in Z] # cleaning

leaves = list(leaves_list(Z))
count = len(leaves)
root = len(Z)+count-1

X = squareform(y)
assert len(X) == count


from utils import memoise


# bar-joseph optimal ordering ################################################

from barjoseph import optimal

leaves = optimal(root, **{
"S": lambda i, j: X[i][j],
"left": lambda i: None if i < count else Z[i-count][0],
"right": lambda i: None if i < count else Z[i-count][1],
"is_leaf": lambda i: i < count,
"is_empty": lambda v: v is None,
})

print leaves
15 changes: 15 additions & 0 deletions orig/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def memoise(func):
"""meoization
trade space for speed by caching function results
"""
results = dict()
def f(*args):
if args in results:
result = results[args]
else:
results[args] = result = func(*args)
return result
f.__name__ = func.__name__
f.__doc__ = func.__doc__
return f
Loading

0 comments on commit 829d77c

Please sign in to comment.