forked from phishman3579/java-algorithms-implementation
-
Notifications
You must be signed in to change notification settings - Fork 1
/
SuffixTrie.java
138 lines (127 loc) · 3.95 KB
/
SuffixTrie.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package com.jwetherell.algorithms.data_structures;
import java.util.Set;
import java.util.TreeSet;
import com.jwetherell.algorithms.data_structures.Trie.Node;
import com.jwetherell.algorithms.data_structures.interfaces.ISuffixTree;
/**
* A suffix trie is a data structure that presents the suffixes of a given
* string in a way that allows for a particularly fast implementation of many
* important string operations.
* <p>
* This implementation is based upon a Trie which is NOT a compact trie.
* <p>
* @see <a href="https://en.wikipedia.org/wiki/Suffix_trie">Suffix Trie (Wikipedia)</a>
* <br>
* @author Justin Wetherell <phishman3579@gmail.com>
*/
@SuppressWarnings("unchecked")
public class SuffixTrie<C extends CharSequence> implements ISuffixTree<C> {
private Trie<C> tree;
/**
* Create a suffix trie from sequence
*
* @param sequence
* to create a suffix trie from.
*/
public SuffixTrie(C sequence) {
tree = new Trie<C>();
int length = sequence.length();
for (int i = 0; i < length; i++) {
CharSequence seq = sequence.subSequence(i, length);
tree.add((C) seq);
}
}
/**
* Add character sequence to the suffix trie.
*
* @param sequence
* to add to trie.
* @return True if added successfully.
*/
public boolean add(C sequence) {
int length = sequence.length();
for (int i = 0; i < length; i++) {
CharSequence seq = sequence.subSequence(i, length);
tree.add((C) seq);
}
return true;
}
/**
* Does the sequence exists in the trie.
*
* @param sequence
* to locate in the trie.
* @return True if sequence exists in trie.
*/
@Override
public boolean doesSubStringExist(C sequence) {
char[] chars = sequence.toString().toCharArray();
int length = chars.length;
Trie.Node current = tree.root;
for (int i = 0; i < length; i++) {
int idx = current.childIndex(chars[i]);
if (idx < 0)
return false;
current = current.getChild(idx);
}
return true;
}
/**
* Get all suffixes in the trie.
*
* @return set of suffixes in trie.
*/
@Override
public Set<String> getSuffixes() {
return this.getSuffixes(tree.root);
}
/**
* Get all suffixes at node.
*
* @param node
* to get all suffixes at.
* @return set of suffixes in trie at node.
*/
private Set<String> getSuffixes(Trie.Node node) {
StringBuilder builder = new StringBuilder();
if (node.character != Node.SENTINAL) builder.append(node.character);
Set<String> set = new TreeSet<String>();
if (node.isWord) {
set.add(builder.toString());
}
for (int i = 0; i < node.getChildrenSize(); i++) {
Trie.Node c = node.getChild(i);
set.addAll(getSuffixes(c, builder.toString()));
}
return set;
}
/**
* Get all suffixes at node and prepend the prefix.
*
* @param node
* to get all suffixes from.
* @param prefix
* to prepend to suffixes.
* @return set of suffixes in trie at node.
*/
private Set<String> getSuffixes(Trie.Node node, String prefix) {
StringBuilder builder = new StringBuilder(prefix);
if (node.character != Node.SENTINAL) builder.append(node.character);
Set<String> set = new TreeSet<String>();
if (node.isWord) {
set.add(builder.toString());
}
for (int i = 0; i < node.getChildrenSize(); i++) {
Trie.Node c = node.getChild(i);
set.addAll(getSuffixes(c, builder.toString()));
}
return set;
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return Trie.TriePrinter.getString(tree);
}
}