-
Notifications
You must be signed in to change notification settings - Fork 51
/
shorten.el
223 lines (186 loc) · 8.57 KB
/
shorten.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
;;; shorten.el --- component-wise string shortener
;; Copyright (C) 2013 John J Foerch <jjfoerch@earthlink.net>
;; Keywords: extensions
;; Author: John J Foerch <jjfoerch@earthlink.net>
;; URL: https://github.com/emacs-circe/circe/blob/master/shorten.el
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; This is a component-wise string shortener, meaning that, given a list
;; of strings, it breaks each string into parts, then computes shortest
;; prefix of each part with respect to others of the same 'depth', such
;; that when joined back together, the shortened form of the whole string
;; remains unique within the resulting list. Many styles of shortening
;; are made possible via three functions that the caller may provide: the
;; split function, the join function, and the validate-component function.
;;
;; Strings are broken with the value of `shorten-split-function' (a
;; procedure string->list), and shortened components are rejoined with the
;; value of `shorten-join-function' (a procedure list->string[*]). The
;; default split and join functions break the string on word boundaries,
;; and rejoin on the empty string. Potential shortened forms of
;; components are tested with `shorten-validate-component-function'; its
;; default value passes only if its argument contains at least one
;; word-constituent character (regexp \w), meaning that by default,
;; components consisting entirely of non-word characters will not be
;; shortened, and components that start with non-word characters will only
;; be shortened so much that they have at least one word-constituent
;; character in them.
;;
;; The main entry point is `shorten-strings', which takes a list of strings
;; as its argument and returns an alist ((STRING . SHORTENED-STRING) ...).
;;
;; [*] Also takes a second argument; see docstring of
;; `shorten-join-function'.
;;; History:
;; - Version 0.1 (March 7, 2013): initial release
;;; Code:
;; Tree utils
;;
(defsubst shorten-make-tree-root ()
(cons nil nil))
(defsubst shorten-tree-make-entry (token short full)
(list token short full nil))
(defsubst shorten-tree-token (entry)
(car entry))
(defsubst shorten-tree-fullname (entry)
(nth 2 entry))
(defsubst shorten-tree-descendants (entry)
(nthcdr 3 entry))
(defsubst shorten-tree-set-shortened (entry short)
(setcar (cdr entry) short))
(defsubst shorten-tree-set-fullname (entry full)
(setcar (nthcdr 2 entry) full))
(defsubst shorten-tree-insert (node item)
(when (car node)
(setcdr node (cons (car node) (cdr node))))
(setcar node item))
;; Caller configuration
;;
(defun shorten-split (s)
(split-string s "\\b" t))
(defun shorten-join (lst &optional tail-count)
(mapconcat #'identity lst ""))
(defun shorten-join-sans-tail (lst tail-count)
"A shorten-join that drops unnecessary tail components."
(shorten-join (butlast lst tail-count)))
(defun shorten-validate-component (str)
(string-match-p "\\w" str))
(defvar shorten-split-function #'shorten-split
"Value should be a function of string->list that breaks a
string into components. The default breaks on word-boundaries.
To get simple prefix shortening, bind this to `list'.
Users should not generally change the global value of this
variable; instead, bind it dynamically around calls to
`shorten-strings'.")
(defvar shorten-join-function #'shorten-join
"A function that takes a list of components and a tail-count,
and returns a joined string. Tail-count is the number of
components on the end of the list that are not needed to uniquify
the result, and so may be safely dropped if aggressive shortening
is desired. The default preserves tail components, and joins the
list on the empty string.
Users should not generally change the global value of this
variable; instead, bind it dynamically around calls to
`shorten-strings'.")
(defvar shorten-validate-component-function #'shorten-validate-component
"Predicate that returns t if a proposed shortened form of a
single component is acceptable, nil if a longer one should be
tried. The default validates only when the candidate contains at
least one word-constituent character, thus strings consisting of
punctuation will not be shortened. For aggressive shortening,
bind to a procedure that always returns t.
Users should not generally change the global value of this
variable; instead, bind it dynamically around calls to
`shorten-strings'.")
;; Main procedures
;;
(defun shorten-one (str others)
"Return shortest unique prefix of STR among OTHERS, or STR if
it cannot be shortened. If STR is a member of OTHERS (tested
with `eq') that entry is ignored. The value of
`shorten-validate-component-function' will be used to validate
any prefix."
(let ((max (length str))
(len 1))
(or (catch 'return
(while (< len max)
(let ((prefix (substring str 0 len)))
(when (funcall shorten-validate-component-function prefix)
(when (catch 'return
(dolist (other others t)
(when (and (>= (length other) len)
(string= (substring other 0 len) prefix)
(not (eq other str)))
(throw 'return nil))))
(throw 'return prefix)))
(setq len (1+ len)))))
str)))
(defun shorten-walk-internal (node path tail-count result-out)
(let ((others (mapcar #'car node)))
(setq tail-count (if (cdr node) 0 (1+ tail-count)))
(dolist (entry node)
(let* ((token (shorten-tree-token entry))
(shortened (shorten-one token others))
(path (cons shortened path))
(fullname (shorten-tree-fullname entry))
(descendants (shorten-tree-descendants entry))
(have-descendants (not (equal '(nil) descendants))))
(shorten-tree-set-shortened entry shortened)
;; if this entry has a fullname, add to result-out
(when fullname
(let ((joined (funcall shorten-join-function
(reverse path)
(if have-descendants 0 tail-count))))
(shorten-tree-insert result-out (cons fullname joined))))
;; if this entry has descendants, recurse
(when have-descendants
(shorten-walk-internal descendants path
(if fullname -1 tail-count)
result-out))))))
(defun shorten-walk (tree)
"Takes a tree of the type made by `shorten-make-tree' and
returns an alist ((STRING . SHORTENED-STRING) ...). Uses
`shorten-join-function' to join shortened components back
together into SHORTENED-STRING. See also
`shorten-validate-component-function'."
(let ((result-out (shorten-make-tree-root)))
(shorten-walk-internal tree '() -1 result-out)
(if (equal '(nil) result-out) nil result-out)))
(defun shorten-make-tree (strings)
"Takes a list of strings and returns a tree of the type used by
`shorten-walk' to generate shortened strings. Uses
`shorten-split-function' to split the strings."
(let ((tree (shorten-make-tree-root)))
(dolist (s strings)
(let ((node tree)
(tokens (funcall shorten-split-function s))
(entry nil))
;; create a path in tree for tokens
(dolist (token tokens)
(setq entry (assoc token node))
(when (not entry)
(setq entry (shorten-tree-make-entry token nil nil))
(shorten-tree-insert node entry))
(setq node (shorten-tree-descendants entry)))
;; for the last token, set 'fullname'
(shorten-tree-set-fullname entry s)))
(if (equal tree '(nil)) nil tree)))
;;;###autoload
(defun shorten-strings (strings)
"Takes a list of strings and returns an alist ((STRING
. SHORTENED-STRING) ...). Uses `shorten-split-function' to split
the strings, and `shorten-join-function' to join shortened
components back together into SHORTENED-STRING. See also
`shorten-validate-component-function'."
(shorten-walk (shorten-make-tree strings)))
(provide 'shorten)
;;; shorten.el ends here