Description
给定一个 nnn 阶排列 a=(a1,a2,⋯ ,an)a=(a_1,a_2,\cdots,a_n)a=(a1,a2,⋯,an).
qqq 次询问 (l,r)(l,r)(l,r),求 ∑l≤i<j≤r[ai>aj]\sum\limits_{l\le i<j\le r} [a_i>a_j]l≤i<j≤r∑[ai>aj].
强制在线,每次给定 l0,r0l_0,r_0l0,r0,则 l=l0⊕lastans,r=r0⊕lastansl=l_0\oplus \textit{lastans},r=r_0\oplus \textit{lastans}l=l0⊕lastans,r=r0⊕lastans.
Limitations
1≤n,m≤1051\le n,m\le 10^51≤n,m≤105
1≤ai≤n1\le a_i\le n1≤ai≤n
1≤l≤r≤n1\le l\le r\le n1≤l≤r≤n
0.75s,512MB\textcolor{red}{0.75\text{s}},512\text{MB}0.75s,512MB
Solution
下记 f(l,r)=∑l≤i<j≤r[ai>aj]f(l,r)=\sum\limits_{l\le i<j\le r} [a_i>a_j]f(l,r)=l≤i<j≤r∑[ai>aj], g(l,r,L,R)=∑l≤i≤r∑L≤j≤R[ai>aj]g(l,r,L,R)=\sum\limits_{l\le i\le r}\sum\limits_{L\le j\le R}[a_i>a_j]g(l,r,L,R)=l≤i≤r∑L≤j≤R∑[ai>aj].
考虑直接对 aaa 分块,第 bbb 块内预处理出:
- prei=f(Lb,i)\textit{pre}_i=f(L_b,i)prei=f(Lb,i)(第 bbb 块内前缀的答案)
- sufi=f(i,Rb)\textit{suf}_i=f(i,R_b)sufi=f(i,Rb)(第 bbb 块内后缀的答案)
- cntb,v=∑i=1Rb[ai<v]\textit{cnt}_{b,v}=\sum\limits_{i=1}^{R_b} [a_i< v]cntb,v=i=1∑Rb[ai<v].(第 1∼b1\sim b1∼b 块内 <v<v<v 的元素个数)
- pip_ipi:aLb∼aRba_{L_b}\sim a_{R_b}aLb∼aRb 排序后的结果,以(实际值,原数组下标)形式保存.
其中 pre\textit{pre}pre 和 suf\textit{suf}suf 可用 BIT
求出.
接下来需要预处理出整块答案 h(i,j)h(i,j)h(i,j),显然 h(i,i)=preRih(i,i)=\textit{pre}_{R_{i}}h(i,i)=preRi,由容斥原理可得:
h(i,j)=h(i+1,j)+h(i,j−1)−h(i+1,j−1)+g(Li,Ri,Lj,Rj)h(i,j)=h(i+1,j)+h(i,j-1)-h(i+1,j-1)+g(L_i,R_i,L_j,R_j)h(i,j)=h(i+1,j)+h(i,j−1)−h(i+1,j−1)+g(Li,Ri,Lj,Rj)
用区间 dp
即可求出 hhh,但是如果现在 ggg 用 BIT
求,会多出来一个 log\loglog.
注意到每块内 pip_ipi 单调递增,所以可以双指针求,复杂度降至 O(B)O(B)O(B):
inline int _query(int bl, int br, int l, int r, int _l, int _r) {
int p = L[br] - 1, tot = 0, res = 0;
for (int i = L[bl]; i <= R[bl]; i++) {
if (sorted[i].second < l || r < sorted[i].second) continue;
while (p < R[br] && sorted[i].first > sorted[p + 1].first) {
p++;
if (_l <= sorted[p].second && sorted[p].second <= _r) tot++;
}
res += tot;
}
return res;
}
接下来考虑查询,设 p=bell,q=belrp=\textit{bel}_l,q=\textit{bel}_rp=bell,q=belr.
若 p=qp=qp=q,则容斥一下可得 ans=prer−prel−1−g(Lp,l−1,l,r)\textit{ans}=pre_r-pre_{l-1}-g(L_p,l-1,l,r)ans=prer−prel−1−g(Lp,l−1,l,r),注意特判 l=Lpl=L_pl=Lp.
若 p≠qp\ne qp=q,则贡献可分为:
- 左右散块各自的贡献.
- 整块间的贡献.
- 左右散块之间的贡献.
- 左散块和整块间的贡献.
- 整块和右散块间的贡献.
前三种加起来显然是 sufl+prer+h(p,q)+g(l,Rp,Lq,r)\textit{suf}_l+\textit{pre}_r+h(p,q)+g(l,R_p,L_q,r)sufl+prer+h(p,q)+g(l,Rp,Lq,r).
对于第五种,考虑左散块内每个数 aia_iai,则整块内每个 <ai<a_i<ai 的数都会和 aia_iai 产生贡献,第六种同理,所以这两种贡献之和是:
(∑l≤i≤Rpcntq−1,ai−cntp,ai)+(∑Lq≤i≤rRq−1−Lp+1+1−cntq−1,ai+cntp,ai)(\sum\limits_{l\le i\le R_p}cnt_{q-1,a_i}-cnt_{p,a_i})+(\sum\limits_{L_q\le i\le r}R_{q-1}-L_{p+1}+1-cnt_{q-1,a_i}+cnt_{p,a_i})(l≤i≤Rp∑cntq−1,ai−cntp,ai)+(Lq≤i≤r∑Rq−1−Lp+1+1−cntq−1,ai+cntp,ai)
这样就做完了……吗?
Optimise
时限很紧,需要作如下卡常:
- 使用内存池分配空间(对于
STL
党) - 块长取 0.5n0.5\sqrt n0.5n
- 快读快写
- 开
O2
然后就能卡过了.
Code
已删 fastio
.
6.15KB,4.58s,250.54MB (C++20 with O2)6.15\text{KB},4.58\text{s},250.54\text{MB}\;\texttt{(C++20 with O2)}6.15KB,4.58s,250.54MB(C++20 with O2)
#include <bits/stdc++.h>
using namespace std;
using i64 = long long;
using ui64 = unsigned long long;
using i128 = __int128;
using ui128 = unsigned __int128;
using f4 = float;
using f8 = double;
using f16 = long double;
template<class T>
bool chmax(T &a, const T &b){
if(a < b){ a = b; return true; }
return false;
}
template<class T>
bool chmin(T &a, const T &b){
if(a > b){ a = b; return true; }
return false;
}
namespace mem {
constexpr int L = 7.5e7 + 10;
int pool[L], *ptr = pool;
inline int* alloci(int n) {
int* res = ptr;
ptr += n;
return res;
}
inline i64* allocl(int n) {
return (i64*)alloci(2 * n);
}
}
using mem::alloci;
using mem::allocl;
namespace Fastio {}
using Fastio::qin;
using Fastio::qout;
inline int lowbit(int x) { return x & -x; }
template<class T>
struct fenwick {
int n;
vector<T> c;
inline fenwick() {}
inline fenwick(int _n): n(_n) { c.resize(n + 1); }
inline fenwick(const vector<T> &a): n(a.size()) {
c.resize(n + 1);
for(int i = 1; i <= n; i++){
c[i] = c[i] + a[i - 1];
int j = i + lowbit(i);
if(j <= n) c[j] = c[j] + c[i];
}
}
inline void add(int x, const T& v) {
for (int i = x + 1; i <= n; i += lowbit(i)) c[i] = c[i] + v;
}
inline T ask(int x) {
T ans{};
for (int i = x + 1; i; i -= lowbit(i)) ans = ans + c[i];
return ans;
}
inline T ask(int l, int r) { return ask(r) - ask(l - 1); }
};
using pii = pair<int, int>;
struct Block {
int n, B, blocks;
int *pre, *suf, *bel, *L, *R;
vector<int*> cnt;
vector<i64*> ans;
vector<pii> sorted;
fenwick<int> fwk;
inline Block() {}
inline Block(int _n) : n(_n), fwk(_n) {
B = max(1, int(sqrt(n) / 2));
blocks = (n + B - 1) / B;
pre = alloci(n), suf = alloci(n);
bel = alloci(n), L = alloci(blocks), R = alloci(blocks);
cnt.resize(blocks), ans.resize(blocks), sorted.resize(n);
for (int i = 0; i < blocks; i++) {
L[i] = i * B;
R[i] = min(L[i] + B, n) - 1;
cnt[i] = alloci(n);
ans[i] = allocl(blocks);
}
}
inline void init_block(int i, const int* a) {
const int bl = L[i], br = R[i];
for (int j = bl; j <= br; j++) {
bel[j] = i, cnt[i][a[j]]++;
if (j ^ bl) pre[j] = pre[j - 1] + fwk.ask(a[j] + 1, n - 1);
fwk.add(a[j], 1);
}
for (int j = bl; j <= br; j++) fwk.add(a[j], -1);
for (int j = br; j >= bl; j--) {
if (j ^ br) suf[j] = suf[j + 1] + fwk.ask(a[j]);
fwk.add(a[j], 1);
}
for (int j = bl; j <= br; j++) fwk.add(a[j], -1);
int res = 0;
for (int j = 0; j < n; j++) {
res += cnt[i][j];
cnt[i][j] = res + (i > 0 ? cnt[i - 1][j] : 0);
}
ans[i][i] = pre[br];
for (int j = bl; j <= br; j++) sorted[j] = pii(a[j], j);
sort(sorted.begin() + bl, sorted.begin() + br + 1);
}
inline void init() {
for (int len = 2; len <= blocks; len++)
for (int bl = 1; bl + len <= blocks; bl++) {
const int br = bl + len - 1;
ans[bl][br] = (
ans[bl + 1][br] + ans[bl][br - 1]
- ans[bl + 1][br - 1]
+ _query(bl, br, L[bl], R[bl], L[br], R[br])
);
}
}
inline int _query(int bl, int br, int l, int r, int _l, int _r) {
int p = L[br] - 1, tot = 0, res = 0;
for (int i = L[bl]; i <= R[bl]; i++) {
if (sorted[i].second < l || r < sorted[i].second) continue;
while (p < R[br] && sorted[i].first > sorted[p + 1].first) {
p++;
if (_l <= sorted[p].second && sorted[p].second <= _r) tot++;
}
res += tot;
}
return res;
}
inline i64 query(int l, int r, const int* a) {
const int bl = bel[l], br = bel[r];
i64 res = 0;
if (bl == br) {
if (l == L[bl]) return pre[r];
res = pre[r] - pre[l - 1] - _query(bl, bl, 0, l - 1, l, r);
return res;
}
res = suf[l] + pre[r] + _query(bl, br, l, R[bl], L[br], r) + ans[bl + 1][br - 1];
for (int i = l; i <= R[bl]; i++)
res += cnt[br - 1][a[i]] - cnt[bl][a[i]];
for (int i = L[br]; i <= r; i++)
res += (R[br - 1] - L[bl + 1] + 1) - cnt[br - 1][a[i]] + cnt[bl][a[i]];
return res;
}
};
signed main() {
ios::sync_with_stdio(0);
cin.tie(0), cout.tie(0);
int n, m;
qin >> n >> m;
int *a = alloci(n);
for (int i = 0; i < n; i++) qin >> a[i], a[i]--;
Block blk(n);
const int blocks = blk.blocks;
for (int i = 0; i < blocks; i++) blk.init_block(i, a);
blk.init();
i64 lst = 0;
for (int i = 0, l, r; i < m; i++) {
qin >> l >> r, l ^= lst, r ^= lst, l--, r--;
qout << (lst = blk.query(l, r, a)) << '\n';
}
return 0;
}