suffix_array/
lib.rs

1fn sort_cyclic_shifts(s: &[char]) -> Vec<usize> {
2    let n = s.len();
3    const ALPHABET: usize = 256;
4    let mut cnt = vec![0; n.max(ALPHABET)];
5    for &ch in s {
6        cnt[ch as usize] += 1;
7    }
8    for i in 1..ALPHABET {
9        cnt[i] += cnt[i - 1];
10    }
11    let mut p = vec![!0; n];
12    // p[i] := the index of the i-th substring (starting at i and with length 2^k) in the sorted order
13    for (i, &ch) in s.iter().enumerate().rev() {
14        cnt[ch as usize] -= 1;
15        p[cnt[ch as usize]] = i;
16    }
17    let mut c = vec![!0; n];
18    // c[i] := the equivalence class to which the substring belongs
19    c[p[0]] = 0;
20    let mut classes = 1;
21    for w in p.windows(2) {
22        let (prev, cur) = (w[0], w[1]);
23        if s[prev] != s[cur] {
24            classes += 1;
25        }
26        c[cur] = classes - 1;
27    }
28    for h in (0..).take_while(|&h| 1 << h < n) {
29        let pn: Vec<usize> = p.iter().copied().map(|x| (n + x - (1 << h)) % n).collect();
30        #[allow(clippy::needless_range_loop)]
31        for i in 0..classes {
32            cnt[i] = 0;
33        }
34        for &x in &pn {
35            cnt[c[x]] += 1;
36        }
37        for i in 1..classes {
38            cnt[i] += cnt[i - 1];
39        }
40        for &x in pn.iter().rev() {
41            cnt[c[x]] -= 1;
42            p[cnt[c[x]]] = x;
43        }
44        let mut cn = vec![!0; n];
45        cn[p[0]] = 0;
46        classes = 1;
47        for w in p.windows(2) {
48            let (prev, cur) = (
49                (c[w[0]], c[(w[0] + (1 << h)) % n]),
50                (c[w[1]], c[(w[1] + (1 << h)) % n]),
51            );
52            if prev != cur {
53                classes += 1;
54            }
55            cn[w[1]] = classes - 1;
56        }
57        c = cn;
58    }
59    p
60}
61
62/// 文字列 `s` の suffix array を O(|s|log|s|) で求めます。
63///
64/// 返り値は `s.len()` を `n` としたとき、長さ `n` のベクタ `sa` であり次の条件を満たすものです。
65///
66/// - `s[sa[i]..]` が `s` の `n` 個ある suffix のうち辞書順で `i` 番目である
67///
68/// original: [CP-Algorithms](https://cp-algorithms.com/string/suffix-array.html)
69///
70/// # Examples
71/// ```
72/// use suffix_array::suffix_array;
73/// let s: Vec<char> = "mississippi".chars().collect();
74/// let sa = suffix_array(&s);
75/// assert_eq!(sa, vec![10, 7, 4, 1, 0, 9, 8, 6, 3, 5, 2]);
76/// // i
77/// // ippi
78/// // issippi
79/// // ississippi
80/// // mississippi
81/// // pi
82/// // ppi
83/// // sippi
84/// // sissippi
85/// // ssippi
86/// // ssissippi
87/// ```
88pub fn suffix_array(s: &[char]) -> Vec<usize> {
89    let mut s = s.to_vec();
90    s.push('$');
91    let sorted_shifts = sort_cyclic_shifts(&s);
92    sorted_shifts[1..].to_vec()
93}
94
95/// LCP 配列を O(|s|) で求めます。
96///
97/// 返り値は長さ `s.len() - 1` のベクタ `lcp` であり `lcp[i]` := `s[sa[i]..]` と `s[sa[i+1]..]` との最長共通接頭辞の長さ、です。
98///
99/// # Examples
100/// ```
101/// use suffix_array::{suffix_array, lcp_array};
102/// let s: Vec<char> = "mississippi".chars().collect();
103/// let sa = suffix_array(&s);
104/// let lcp = lcp_array(&s, &sa);
105/// assert_eq!(lcp, vec![1, 1, 4, 0, 0, 1, 0, 2, 1, 3]);
106pub fn lcp_array(s: &[char], sa: &[usize]) -> Vec<usize> {
107    let n = sa.len();
108    if n == 1 {
109        return vec![];
110    }
111    let mut rank = vec![!0; n];
112    for i in 0..n {
113        rank[sa[i]] = i;
114    }
115    let mut k = 0;
116    let mut lcp = vec![0; n - 1];
117    for i in 0..n {
118        if rank[i] + 1 == n {
119            k = 0;
120            continue;
121        }
122        if k >= 1 {
123            k -= 1;
124        }
125        let j = sa[rank[i] + 1];
126        while i + k < n && j + k < n && s[i + k] == s[j + k] {
127            k += 1;
128        }
129        lcp[rank[i]] = k;
130    }
131    lcp
132}
133
134#[cfg(test)]
135mod tests {
136    use crate::{lcp_array, suffix_array};
137
138    #[test]
139    fn test_small() {
140        let tests = vec![
141            ("a", vec![0], vec![]),
142            ("aa", vec![1, 0], vec![1]),
143            ("abc", vec![0, 1, 2], vec![0, 0]),
144            ("aaba", vec![3, 0, 1, 2], vec![1, 1, 0]),
145            ("abaab", vec![2, 3, 0, 4, 1], vec![1, 2, 0, 1]),
146            ("dabbb", vec![1, 4, 3, 2, 0], vec![0, 1, 2, 0]),
147        ];
148        for (s, sa, lcp) in tests {
149            let s: Vec<char> = s.chars().collect();
150            assert_eq!(suffix_array(&s), sa);
151            assert_eq!(lcp_array(&s, &suffix_array(&s)), lcp);
152        }
153    }
154}