-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSuffixArray.java
103 lines (86 loc) · 2.63 KB
/
SuffixArray.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/**
* Abstract class that captures the behavior of a suffix array.
*
* @author William Fiset, [email protected]
*/
public abstract class SuffixArray {
// Length of the suffix array
protected final int N;
// T is the text
protected int[] T;
// The sorted suffix array values.
protected int[] sa;
// Longest Common Prefix array
protected int[] lcp;
private boolean constructedSa = false;
private boolean constructedLcpArray = false;
public SuffixArray(int[] text) {
if (text == null) throw new IllegalArgumentException("Text cannot be null.");
this.T = text;
this.N = text.length;
}
public int getTextLength() {
return T.length;
}
// Returns the suffix array.
public int[] getSa() {
buildSuffixArray();
return sa;
}
// Returns the LCP array.
public int[] getLcpArray() {
buildLcpArray();
return lcp;
}
// Builds the suffix array by calling the construct() method.
protected void buildSuffixArray() {
if (constructedSa) return;
construct();
constructedSa = true;
}
// Builds the LCP array by first creating the SA and then running the kasai algorithm.
protected void buildLcpArray() {
if (constructedLcpArray) return;
buildSuffixArray();
kasai();
constructedLcpArray = true;
}
protected static int[] toIntArray(String s) {
if (s == null) return null;
int[] t = new int[s.length()];
for (int i = 0; i < s.length(); i++) t[i] = s.charAt(i);
return t;
}
// The suffix array construction algorithm is left undefined
// as there are multiple ways to do this.
protected abstract void construct();
// Use Kasai algorithm to build LCP array
// http://www.mi.fu-berlin.de/wiki/pub/ABI/RnaSeqP4/suffix-array.pdf
private void kasai() {
lcp = new int[N];
int[] inv = new int[N];
for (int i = 0; i < N; i++) inv[sa[i]] = i;
for (int i = 0, len = 0; i < N; i++) {
if (inv[i] > 0) {
int k = sa[inv[i] - 1];
while ((i + len < N) && (k + len < N) && T[i + len] == T[k + len]) len++;
lcp[inv[i]] = len;
if (len > 0) len--;
}
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("-----i-----SA-----LCP---Suffix\n");
for (int i = 0; i < N; i++) {
int suffixLen = N - sa[i];
char[] suffixArray = new char[suffixLen];
for (int j = sa[i], k = 0; j < N; j++, k++) suffixArray[k] = (char) T[j];
String suffix = new String(suffixArray);
String formattedStr = String.format("% 7d % 7d % 7d %s\n", i, sa[i], lcp[i], suffix);
sb.append(formattedStr);
}
return sb.toString();
}
}