Codeforces Gym 103409H. Popcount Words


Codeforces Gym 103409H. Popcount Words

首先得先观察 \(w[\,l,\,r\,]\) 是个什么玩意儿.

通过枚举可发现, \(w[\,0,\,+\infty)=\texttt{01101001100101101001011001101001...}\) 其满足:

  • \(w[\,0,\,1)=\texttt{0};\;\hat w[\,0,\,1)=\texttt{1}\)
  • \(\forall n\in\N^+,\,\begin{cases}w[\,0,\,2^n)=w[\,0,\,2^{n-1})+\hat w[\,0,\,2^{n-1})\\\hat w[\,0,\,2^n)=\hat w[\,0,\,2^{n-1})+w[\,0,\,2^{n-1})\end{cases}\)

\(W_{n,\,0}=w[\,0,\,2^n),\,W_{n,\,1}=\hat w[\,0,\,2^n)\) . 于是上面的规律可以表示为:

  • \(W_{0,\,0}=\texttt{0},\,W_{0,\,1}=\texttt{1}\)
  • \(\forall n\in\N^+,i\in \{0,1\},\,W_{n,\,i}=W_{n-1,\,i}+W_{n-1,\,1-i}\)

\(\operatorname{parity}(x)\) 为数 \(x\) 在二进制下 \(\texttt{1}\) 的个数的奇偶性, 则有 \(w[\,k2^n,\,(k+1)2^n)=W_{n,\,\operatorname{parity}(k)}\)

根据以上几条性质, 便可以得出一个结论:

结论 对于每一个 \(w[\,l,\,r)\) 可以由 \(\mathcal O(\log r)\)\(W_{n,\,i}\) 拼接而成.

构造 构建一个长度为 \(\infty\) 的 0-base线段树, 然后将该区间 \([\,l,\,r)\) 放置在线段树上, 会有 \(2\log r\) 个区间 \([\,l_i,\,r_i)\) , 且这些区间都满足 \(r_i-l_i=2^t\,\and\,2^t\,|\,l_i\) . 故该 \(2\log r\) 个区间即为所求.

于是我们将原题 \(n\) 个区间 \([\,l,\,r\,]\) 按照上述方式分解, 可得到共 \(\mathcal O(n\log V)\)\(W_{k,\,i}\) .

回到原题. 多串匹配可以尝试建立询问串 \(p_i\) 的 AC自动机. 如果能求出串 \(S\) 经过每个节点的次数, 那么询问串 \(p_i\) 的答案就是对应节点 \(\text{fail}\) 子树的次数和.

\(f_{u,\,n,\,i}\) 表示AC自动机上的节点 \(u\) 经过串 \(W_{n,\,i}\) 后到达的节点. 那么根据 \(W_{n,\,i}\) 的转移可知, \(f\) 有状态转移方程 \(\forall n\in\N^+,\,f_{u,\,n,\,i}=f_{f_{u,\,n-1,\,i},\,n-1,\,1-i}\) . 这里是一个朴素的 \(\text{DP}\) .

但我们要求的是每个节点被经过了多少次, 而并不是最后会到达哪个节点. 于是可以再记 \(g_{u,\,n,\,i}\) 为节点 \(u\) 经过串 \(W_{n,\,i}\) 上沿路的 \(2^n\) 个节点 ( 不包括 \(u\) ) 一起出现的次数. 那么最后原串 \(S\) 经过节点 \(u\) 的次数为 \(g_{u_0,\,0,\,0}+g_{u_1,\,0,\,1}\) , 其中节点 \(u_0,\,u_1\) 分别满足 \(\operatorname {ch}_{u_0,\,0}=u,\,\operatorname{ch}_{u_1,\,1}=u\) .

\(c_{u,\,n,\,i}\) 为节点 \(i\) 经过串 \(W_{n,\,i}\) 的次数. 按照从大到小的顺序转移 \(g\) , 则可得出其状态转移方程为 \(g_{u,\,n,\,i}=c_{u,\,n,\,i}+g_{u,\,n+1,\,i}+\sum\limits_{f_{v,\,n,\,1-i}=u}g_{v,\,n+1,\,1-i}\) . 在求出 \(g\) 之后, 便可求出 \(S\) 经过每个节点的次数, 最后在 \(\text{fail}\) 树上统计子树之和就可以得到答案.

计算 \(f\)\(g\) 的时空复杂度均为 \(\mathcal O(\sum|p_i|\log V)\) . 预处理 \(S\) 的时间复杂度为 \(\mathcal O(n\log V)\). 于是总时间复杂度为 \(\mathcal O((n+\sum|p_i|)\log V)\) .

参考代码

#include 
using namespace std;

static constexpr int Maxn = 1e5 + 5, Maxm = 5e5 + 5, MaxC = 2, Maxq = 1e5 + 5;
static constexpr int LOG = 30;

int n, q;
int L[Maxn], R[Maxn];
int endpos[Maxq];
int64_t S[Maxm];

int m, N[LOG * 2], I[LOG * 2];
void get_build(int ql, int qr, int l, int r) {
  if (l >= qr || ql >= r) return ;
  if (ql <= l && r <= qr) {
    int k = 31 - __builtin_clz(r - l);
    int q = l >> k;
    N[m] = k, I[m] = __builtin_parity(q); ++m;
    return ;
  }
  int mid = (l + r) >> 1;
  get_build(ql, qr, l, mid);
  get_build(ql, qr, mid, r);
} // get_build

int ch[Maxm][MaxC], fail[Maxm];
int root, tot;
static int que[Maxm];
int insert(int len, const char *s) {
  int u = root;
  for (int i = 0; i < len; ++i) {
    int c = s[i] - '0';
    if (!ch[u][c])
      ch[u][c] = ++tot;
    u = ch[u][c];
  }
  return u;
} // insert
void build(void) {
  int qh = 0, qe = 0;
  fail[root] = root;
  for (int j = 0; j < 2; ++j) {
    if (ch[root][j]) {
      fail[ch[root][j]] = root;
      que[qe++] = ch[root][j];
    } else {
      ch[root][j] = root;
    }
  }
  while (qh < qe) {
    int u = que[qh++];
    for (int j = 0; j < 2; ++j) {
      if (ch[u][j]) {
        fail[ch[u][j]] = ch[fail[u]][j];
        que[qe++] = ch[u][j];
      } else {
        ch[u][j] = ch[fail[u]][j];
      }
    }
  }
} // build

int f[2][LOG][Maxm], c[2][LOG][Maxm];
int64_t g[2][LOG + 1][Maxm];

int main(void) {

  scanf("%d%d", &n, &q);
  for (int i = 1; i <= n; ++i)
    scanf("%d%d", &L[i], &R[i]);
  root = 0; tot = 0;
  for (int i = 1; i <= q; ++i) {
    static char str[Maxm];
    scanf("%s", str);
    endpos[i] = insert(strlen(str), str);
  }
  build();
  for (int i = 0; i <= tot; ++i)
    f[0][0][i] = ch[i][0], f[1][0][i] = ch[i][1];
  for (int j = 1; j < LOG; ++j)
    for (int i = 0; i <= tot; ++i) {
      f[0][j][i] = f[1][j - 1][f[0][j - 1][i]];
      f[1][j][i] = f[0][j - 1][f[1][j - 1][i]];
    }
  int cur = 0;
  for (int i = 1; i <= n; ++i) {
    m = 0, get_build(L[i], R[i] + 1, 0, 1 << 30);
    for (int j = 0; j < m; ++j) {
      c[I[j]][N[j]][cur]++;
      cur = f[I[j]][N[j]][cur];
    }
  }
  for (int j = LOG - 1; j >= 0; --j)
    for (int i = 0; i <= tot; ++i) {
      g[0][j][i] += c[0][j][i] + g[0][j + 1][i];
      g[1][j][i] += c[1][j][i] + g[1][j + 1][i];
      g[1][j][f[0][j][i]] += g[0][j + 1][i];
      g[0][j][f[1][j][i]] += g[1][j + 1][i];
    }
  memset(S, 0, sizeof(S));
  for (int i = 0; i <= tot; ++i) {
    S[ch[i][0]] += g[0][0][i];
    S[ch[i][1]] += g[1][0][i];
  }
  for (int i = tot; i >= 0; --i)
    S[fail[que[i]]] += S[que[i]];
  for (int i = 1; i <= q; ++i)
    printf("%lld\n", S[endpos[i]]);

  exit(EXIT_SUCCESS);
} // main