Codeforces Gym 103409 J. Suffix Automaton


Codeforces Gym 103409 J. Suffix Automaton

容易将询问转化为求给定长度的本质不同字典序第 \(k\) 大的子串, 对于每一个长度离线处理所有询问即可.

这里使用后缀数组去解决该问题. 求出 height 数组后, 长度为 \(l\) 的本质不同子串数量等于 height 数组中小于 \(l\) 的位置中除去长度小于 \(l\) 的后缀的个数. 于是可以维护 height 数组中小于 \(l\) 且后缀长度大于等于 \(l\) 的位置集合 \(S\) , 由于 height 数组按字典序排列, 于是找到集合 \(S\) 中的第 \(k\) 小和第 \(k+1\) 小的 \(S_k,\,S_{k+1}\) , 答案就是区间 \([\,S_k,\,S_{k+1})\) 中长度等于 \(l\) 对应的字符串. 由于需要找下标最小, 因此需要求该区间中后缀起点的最小值, 用一个 ST 表维护即可.

至于维护区间第 \(k\) 小, 区间插入, 区间删除, 只需一个平衡树/树状数组即可, 复杂度为 \(\mathcal O(n\log n)\) .

于是总时间复杂度为 \(\mathcal O(n\log n)\) .

参考代码

#include 
using namespace std;

static constexpr int Maxn = 1e6 + 5, LOG = 21;

int n, Q;
pair ans[Maxn];
char s[Maxn];
struct query {
  int64_t k;
  int id;
  query() = default;
  query(int64_t k, int id) : k(k), id(id) { }
  friend bool operator <(const query &lhs, const query &rhs) {
    return make_pair(lhs.k, lhs.id) < make_pair(rhs.k, rhs.id);
  }
} q[Maxn];

class suffix_array {
 public:
  typedef int index_t;
 private:
 template
  static int sz(T &&arg) {
    using std::size; return int(size(std::forward(arg)));
  }
 public:
  suffix_array() { }
  const index_t &sa(int k) const { return _sa[k]; }
  const index_t &rank(int k) const { return _rank[k]; }
  const index_t &lcp(int k) const { return _lcp[k]; }
 std::tuple, std::vector, std::vector>
  to() { return std::make_tuple(_sa, _rank, _lcp); }
 template
  static suffix_array construct(const string_t &S) {
    int N = sz(S);
    suffix_array _sa(N);
    _sa.build_sa(S);
    _sa.build_rank();
    _sa.build_lcp(S);
    return _sa;
  }
 template
  static suffix_array map_and_construct(const string_t &S, const F &f) {
    std::vector mapped(sz(S));
    for (int i = 0; i < sz(S); i++) mapped[i] = f(S[i]);
    return construct(mapped);
  }
 template
  static suffix_array compress_and_construct(const string_t &S) {
    using std::begin; using std::end;
    std::vector vals(begin(S), end(S));
    std::sort(vals.begin(), vals.end());
    vals.resize(unique(vals.begin(), vals.end()) - vals.begin());
    std::vector compressed_s(sz(S));
    for (int i = 0; i < sz(S); i++)
      compressed_s[i] = lower_bound(vals.begin(), vals.end(), S[i]) - vals.begin();
    return construct(compressed_s);
  }
  static suffix_array construct_lower_alpha(const std::string &s) {
    return suffix_array::map_and_construct(s, [](char c) -> char { return char(c - 'a'); });
  }
  static suffix_array construct_upper_alpha(const std::string &s) {
    return suffix_array::map_and_construct(s, [](char c) -> char { return char(c - 'A'); });
  }
 private:
  explicit suffix_array(int N_) : N(N_) { }
 template
  void build_sa(const string_t &S) {
    _sa = std::vector(N + 1);
    for (auto s : S) assert(index_t(s) >= 0);
    int sigma = N ? *max_element(S.begin(), S.end()) + 1 : 0;
    std::vector tmp(std::max(N, sigma * 2));
    suffix_array::sais(N, S, _sa.data(), sigma, tmp.data());
    _sa.erase(_sa.begin());
  }
 template
  static void sais(int N, const string_t &S, index_t *_sa, int sigma, index_t *tmp) {
    if (N == 0) return void(_sa[0] = 0);
    else if (N == 1) return _sa[0] = 1, _sa[1] = 0, void();
    index_t *freq = tmp; tmp += sigma;
    memset(freq, 0, sizeof(*freq) * sigma);
    for (int i = 0; i < N; i++) ++freq[index_t(S[i])];
    auto build_bucket_start = [&]() {
      for (int v = 0, cur = 1; v < sigma; v++) tmp[v] = cur, cur += freq[v];
    };
    auto build_bucket_end = [&]() {
      for (int v = 0, cur = 1; v < sigma; v++) cur += freq[v], tmp[v] = cur;
    };
    int num_pieces = 0, first_endpoint = 0;
    build_bucket_end(); _sa[0] = N;
    index_t c0 = S[N - 1], c1 = -1;
    bool isS = false;
    for (int i = N - 2; i >= 0; i--) {
      c1 = c0; c0 = S[i];
      if (c0 < c1) isS = true;
      else if (c0 > c1 && isS)
        isS = false, _sa[first_endpoint = --tmp[c1]] = i + 1, ++num_pieces;
    }
    if (num_pieces > 1) {
      _sa[first_endpoint] = 0;
      build_bucket_start();
      for (int z = 0; z <= N; z++) {
        int v = _sa[z];
        if (v <= 0) continue;
        _sa[z] = 0; --v;
        index_t c0 = S[v - 1], c1 = S[v];
        _sa[tmp[c1]++] = (c0 < c1) ? ~v : v;
      }
      index_t *const sa_end = _sa + N + 1;
      index_t *pieces = sa_end;
      build_bucket_end();
      for (int z = N; z >= 0; z--) {
        int v = _sa[z];
        if (!v) continue;
        _sa[z] = 0;
        if (v > 0) { *--pieces = v; continue; }
        v = ~v; --v;
        index_t c0 = S[v - 1], c1 = S[v];
        _sa[--tmp[c1]] = (c0 > c1) ? v : ~v;
      }
      int prv_start = N;
      index_t c0 = S[N - 1], c1 = -1;
      bool isS = false;
      for (int i = N - 2; i >= 0; i--) {
        c1 = c0; c0 = S[i];
        if (c0 < c1) isS = true;
        else if (c0 > c1 && isS) {
          isS = false;
          int v = i + 1;
          _sa[v >> 1] = prv_start == N ? 0 : prv_start - v;
          prv_start = v;
        }
      }
      int next_sigma = 0;
      int prv_len = -1, prv_v = 0;
      for (int i = 0; i < num_pieces; i++) {
        int v = pieces[i];
        int len = _sa[v >> 1];
        bool eq = prv_len == len;
        for (int a = 0; eq && a < len; ++a) eq = S[v + a] == S[prv_v + a];
        if (!eq) next_sigma++, prv_len = len, prv_v = v;
        _sa[v >> 1] = next_sigma;
      }
      if (next_sigma == num_pieces) {
        _sa[0] = N;
        memcpy(_sa + 1, pieces, sizeof(*_sa) * num_pieces);
      } else {
        index_t *next_S = sa_end;
        for (int i = (N - 1) >> 1; i >= 0; i--) {
          int v = _sa[i];
          if (v) *--next_S = v - 1;
          _sa[i] = 0;
        }
        memset(_sa, 0, sizeof(*_sa) * (num_pieces + 1));
        sais(num_pieces, next_S, _sa, next_sigma, tmp);
        next_S = sa_end;
        index_t c0 = S[N - 1], c1 = -1;
        bool isS = false;
        for (int i = N - 2; i >= 0; i--) {
          c1 = c0; c0 = S[i];
          if (c0 < c1) isS = true;
          else if (c0 > c1 && isS)
            isS = false, *--next_S = i + 1;
        }
        _sa[0] = N;
        for (int i = 1; i <= num_pieces; i++)
          _sa[i] = next_S[_sa[i]];
      }
      memset(_sa + num_pieces + 1, 0, sizeof(*_sa) * (N - num_pieces));
      build_bucket_end();
      for (int i = num_pieces; i > 0; i--) {
        int v = _sa[i];
        _sa[i] = 0;
        index_t c1 = S[v];
        _sa[--tmp[c1]] = v;
      }
    }
    build_bucket_start();
    for (int z = 0; z <= N; z++) {
      int v = _sa[z];
      if (v <= 0) continue;
      index_t c1 = S[--v];
      index_t c0 = v ? S[v - 1] : c1;
      _sa[tmp[c1]++] = (c0 < c1) ? ~v : v;
    }
    build_bucket_end();
    for (int z = N; z >= 0; z--) {
      int v = _sa[z];
      if (v >= 0) continue;
      _sa[z] = v = ~v;
      index_t c1 = S[--v];
      index_t c0 = v ? S[v - 1] : c1 + 1;
      _sa[--tmp[c1]] = (c0 > c1) ? v : ~v;
    }
  }
  void build_rank() {
    _rank = std::vector(N);
    for (int i = 0; i < N; i++) _rank[_sa[i]] = i;
  }
 template
  void build_lcp(const string_t &S) {
    assert(sz(S) == N);
    _lcp = std::vector(N);
    for (int i = 0, j = 0; i < N; ++i) {
      if (_rank[i]) {
        if (j) --j;
        for (; S[_sa[_rank[i]] + j] == S[_sa[_rank[i] - 1] + j]; ++j);
        _lcp[_rank[i]] = j;
      }
    }
  }
 private:
  int N;
  std::vector _sa;
  std::vector _rank;
  std::vector _lcp;
};
vector sa, rnk, lcp;
int S[LOG][Maxn], lg2[Maxn], S2[LOG][Maxn];
int ask(int l, int r) {
  int k = lg2[r - l + 1];
  return min(S[k][l], S[k][r - (1 << k) + 1]);
} // ask
int bit[Maxn];
inline void upd(int x, int y) {
  for (++x; x <= n; x += x & -x) bit[x] += y;
} // upd
inline int qry(int x) {
  int r = 0;
  for (++x; x; x -= x & -x) r += bit[x];
  return r;
} // qry
inline int get_rank(int k) {
  int x = 0, i;
  for (i = LOG - 1, ++k; i >= 0; --i)
    if (x + (1 << i) <= n && k > bit[x + (1 << i)])
      x += (1 << i), k -= bit[x];
  return x;
} // get_rank

int main (void) {

  static char ss[Maxn];
  string s;
  scanf("%s", ss); s = ss;
  n = s.length();
  tie(sa, rnk, lcp) = suffix_array::construct(s).to();
  scanf("%d", &Q);
  for (int64_t i = 0, k; i < Q; ++i)
    scanf("%lld", &k), q[i] = query(k, i), q[i].id = i;
  sort(q, q + Q);
  static vector hlcp[Maxn];
  for (int i = 0; i < n; ++i)
    hlcp[lcp[i]].push_back(i);
  lg2[0] = -1;
  for (int i = 1; i <= n; ++i) lg2[i] = lg2[i >> 1] + 1;
  memset(S, 0x3f, sizeof(S));
  for (int i = 0; i < n; ++i) S[0][i] = sa[i];
  for (int j = 1; j < LOG; ++j)
    for (int i = 0; i + (1 << j) <= n; ++i)
      S[j][i] = min(S[j - 1][i], S[j - 1][i + (1 << j - 1)]);
  int j = 0;
  int64_t now = 0;
  for (int i = 0; i < n; ++i) {
    for (const int &x: hlcp[i]) if (n - sa[x] >= i) upd(x, 1);
    if (i > 0 && lcp[rnk[n - i]] <= i) upd(rnk[n - i], -1);
    int sz = qry(n - 1);
    int64_t ns = sz + now;
    for (; j < Q && q[j].k <= ns; ++j) {
      int k = get_rank(q[j].k - now - 1);
      int r = (q[j].k - now == sz ? n - 1 : get_rank(q[j].k - now) - 1);
      k = ask(k, r);
      ans[q[j].id] = {k, k + i};
    }
    now = ns;
  }
  while (j < Q) ans[q[j].id] = {-2, -2}, ++j;
  for (int i = 0; i < Q; ++i)
    printf("%d %d\n", ans[i].first + 1, ans[i].second + 1);

  exit(EXIT_SUCCESS);
} // main