LG P5046 [Ynoi2019 模拟赛] Yuno loves sqrt technology I Solution-CSDN博客

本文链接：https://blog.csdn.net/sblsf/article/details/149485099

Description

给定一个 $n$ 阶排列 $,an)a=(a_1,a_2,\cdots,a_n)$ .
$q$ 次询问 $(l, r)$ ，求 $∑l≤i<j≤r[ai>aj]\sum\limits_{l\le i<j\le r} [a_i>a_j]$ .
强制在线，每次给定 $l_0,r_0$ ，则 $l=l0⊕lastans,r=r0⊕lastansl=l_0\oplus \textit{lastans},r=r_0\oplus \textit{lastans}$ .

Limitations

$1≤n,m≤1051\le n,m\le 10^5$
$1≤ai≤n1\le a_i\le n$
$1≤l≤r≤n1\le l\le r\le n$
$0.75s,512MB\textcolor{red}{0.75\text{s}},512\text{MB}$

Solution

下记 $f(l,r)=∑l≤i<j≤r[ai>aj]f(l,r)=\sum\limits_{l\le i<j\le r} [a_i>a_j]$ , $g(l,r,L,R)=∑l≤i≤r∑L≤j≤R[ai>aj]g(l,r,L,R)=\sum\limits_{l\le i\le r}\sum\limits_{L\le j\le R}[a_i>a_j]$ .

考虑直接对 $a$ 分块，第 $b$ 块内预处理出：

$prei=f(Lb,i)\textit{pre}_i=f(L_b,i)$ （第 $b$ 块内前缀的答案）
$sufi=f(i,Rb)\textit{suf}_i=f(i,R_b)$ （第 $b$ 块内后缀的答案）
$cntb,v=∑i=1Rb[ai<v]\textit{cnt}_{b,v}=\sum\limits_{i=1}^{R_b} [a_i< v]$ .（第 $1∼b1\sim b$ 块内 $< v$ 的元素个数）
$p_i$ ： $aLb∼aRba_{L_b}\sim a_{R_b}$ 排序后的结果，以（实际值，原数组下标）形式保存.

其中 $pre\textit{pre}$ 和 $suf\textit{suf}$ 可用 BIT 求出.

接下来需要预处理出整块答案 $h (i, j)$ ，显然 $h(i,i)=preRih(i,i)=\textit{pre}_{R_{i}}$ ，由容斥原理可得：
$h(i,j)=h(i+1,j)+h(i,j-1)-h(i+1,j-1)+g(L_i,R_i,L_j,R_j)$

用区间 dp 即可求出 $h$ ，但是如果现在 $g$ 用 BIT 求，会多出来一个 $log⁡\log$ .
注意到每块内 $p_i$ 单调递增，所以可以双指针求，复杂度降至 $O (B)$ ：

inline int _query(int bl, int br, int l, int r, int _l, int _r) {
    int p = L[br] - 1, tot = 0, res = 0;
    for (int i = L[bl]; i <= R[bl]; i++) {
        if (sorted[i].second < l || r < sorted[i].second) continue;
        while (p < R[br] && sorted[i].first > sorted[p + 1].first) {
            p++;
            if (_l <= sorted[p].second && sorted[p].second <= _r) tot++;
        }
        res += tot;
    }
    return res;
}

接下来考虑查询，设 $p=bell,q=belrp=\textit{bel}_l,q=\textit{bel}_r$ .
若 $p = q$ ，则容斥一下可得 $ans=prer−prel−1−g(Lp,l−1,l,r)\textit{ans}=pre_r-pre_{l-1}-g(L_p,l-1,l,r)$ ，注意特判 $l=L_p$ .

若 $p≠qp\ne q$ ，则贡献可分为：

左右散块各自的贡献.
整块间的贡献.
左右散块之间的贡献.
左散块和整块间的贡献.
整块和右散块间的贡献.

前三种加起来显然是 $sufl+prer+h(p,q)+g(l,Rp,Lq,r)\textit{suf}_l+\textit{pre}_r+h(p,q)+g(l,R_p,L_q,r)$ .
对于第五种，考虑左散块内每个数 $a_i$ ，则整块内每个 $a_i$ 的数都会和 $a_i$ 产生贡献，第六种同理，所以这两种贡献之和是：
$(∑l≤i≤Rpcntq−1,ai−cntp,ai)+(∑Lq≤i≤rRq−1−Lp+1+1−cntq−1,ai+cntp,ai)(\sum\limits_{l\le i\le R_p}cnt_{q-1,a_i}-cnt_{p,a_i})+(\sum\limits_{L_q\le i\le r}R_{q-1}-L_{p+1}+1-cnt_{q-1,a_i}+cnt_{p,a_i})$

这样就做完了……吗？

Optimise

时限很紧，需要作如下卡常：

使用内存池分配空间（对于 STL 党）
块长取 $0.5n0.5\sqrt n$
快读快写
开 O2

然后就能卡过了.

Code

已删 fastio.
$O2)6.15\text{KB},4.58\text{s},250.54\text{MB}\;\texttt{(C++20 with O2)}$

#include <bits/stdc++.h>
using namespace std;

using i64 = long long;
using ui64 = unsigned long long;
using i128 = __int128;
using ui128 = unsigned __int128;
using f4 = float;
using f8 = double;
using f16 = long double;

template<class T>
bool chmax(T &a, const T &b){
	if(a < b){ a = b; return true; }
	return false;
}

template<class T>
bool chmin(T &a, const T &b){
	if(a > b){ a = b; return true; }
	return false;
}

namespace mem {
	constexpr int L = 7.5e7 + 10;
	int pool[L], *ptr = pool;
	
	inline int* alloci(int n) {
		int* res = ptr;
		ptr += n;
		return res;
	}
	
	inline i64* allocl(int n) {
		return (i64*)alloci(2 * n);
	}
}

using mem::alloci;
using mem::allocl;

namespace Fastio {}
using Fastio::qin;
using Fastio::qout;

inline int lowbit(int x) { return x & -x; }

template<class T>
struct fenwick {
	int n;
	vector<T> c;
	
	inline fenwick() {}
	inline fenwick(int _n): n(_n) { c.resize(n + 1); }
	
	inline fenwick(const vector<T> &a): n(a.size()) {
		c.resize(n + 1);
		for(int i = 1; i <= n; i++){
			c[i] = c[i] + a[i - 1];
			int j = i + lowbit(i);
			if(j <= n) c[j] = c[j] + c[i];
		}
	}
	
	inline void add(int x, const T& v) {
		for (int i = x + 1; i <= n; i += lowbit(i)) c[i] = c[i] + v;
	}
	
	inline T ask(int x) {
		T ans{};
		for (int i = x + 1; i; i -= lowbit(i)) ans = ans + c[i];
		return ans;
	}
	
	inline T ask(int l, int r) { return ask(r) - ask(l - 1); }
};

using pii = pair<int, int>;

struct Block {
	int n, B, blocks;
	int *pre, *suf, *bel, *L, *R;
	vector<int*> cnt;
	vector<i64*> ans;
	vector<pii> sorted;
	fenwick<int> fwk;
	
	inline Block() {}
	inline Block(int _n) : n(_n), fwk(_n) {
        B = max(1, int(sqrt(n) / 2));
		blocks = (n + B - 1) / B;
		pre = alloci(n), suf = alloci(n);
		bel = alloci(n), L = alloci(blocks), R = alloci(blocks);
		
		cnt.resize(blocks), ans.resize(blocks), sorted.resize(n);
		for (int i = 0; i < blocks; i++) {
			L[i] = i * B;
			R[i] = min(L[i] + B, n) - 1;
			cnt[i] = alloci(n);
			ans[i] = allocl(blocks);
		}
	}
	
	inline void init_block(int i, const int* a) {
		const int bl = L[i], br = R[i];
	    for (int j = bl; j <= br; j++) {
	        bel[j] = i, cnt[i][a[j]]++;
	        if (j ^ bl) pre[j] = pre[j - 1] + fwk.ask(a[j] + 1, n - 1);
	        fwk.add(a[j], 1);
	    }
	    for (int j = bl; j <= br; j++) fwk.add(a[j], -1);
	    
	    for (int j = br; j >= bl; j--) {
	        if (j ^ br) suf[j] = suf[j + 1] + fwk.ask(a[j]);
	        fwk.add(a[j], 1);
	    }
	    for (int j = bl; j <= br; j++) fwk.add(a[j], -1);
	    
	    int res = 0;
	    for (int j = 0; j < n; j++) {
	    	res += cnt[i][j];
	    	cnt[i][j] = res + (i > 0 ? cnt[i - 1][j] : 0);
	    }
	    
	    ans[i][i] = pre[br];
	    for (int j = bl; j <= br; j++) sorted[j] = pii(a[j], j);
	    sort(sorted.begin() + bl, sorted.begin() + br + 1);
	}
	
	inline void init() {
		for (int len = 2; len <= blocks; len++)
			for (int bl = 1; bl + len <= blocks; bl++) {
				const int br = bl + len - 1;
				ans[bl][br] = (
				    ans[bl + 1][br] + ans[bl][br - 1] 
				  - ans[bl + 1][br - 1] 
				  + _query(bl, br, L[bl], R[bl], L[br], R[br])
				);
			}
	}
	
	inline int _query(int bl, int br, int l, int r, int _l, int _r) {
	    int p = L[br] - 1, tot = 0, res = 0;
	    for (int i = L[bl]; i <= R[bl]; i++) {
	        if (sorted[i].second < l || r < sorted[i].second) continue;
	        while (p < R[br] && sorted[i].first > sorted[p + 1].first) {
	            p++;
	            if (_l <= sorted[p].second && sorted[p].second <= _r) tot++;
	        }
	        res += tot;
	    }
	    return res;
	}
	
	inline i64 query(int l, int r, const int* a) {
	    const int bl = bel[l], br = bel[r];
	    i64 res = 0;
	    if (bl == br) {
	        if (l == L[bl]) return pre[r];
	        res = pre[r] - pre[l - 1] - _query(bl, bl, 0, l - 1, l, r);
	        return res;
	    }
	    res = suf[l] + pre[r] + _query(bl, br, l, R[bl], L[br], r) + ans[bl + 1][br - 1];
	    for (int i = l; i <= R[bl]; i++)
	        res += cnt[br - 1][a[i]] - cnt[bl][a[i]];
	    for (int i = L[br]; i <= r; i++)
	        res += (R[br - 1] - L[bl + 1] + 1) - cnt[br - 1][a[i]] + cnt[bl][a[i]];
	    return res;
	}
};

signed main() {
	ios::sync_with_stdio(0);
	cin.tie(0), cout.tie(0);
	
	int n, m;
	qin >> n >> m;
	
	int *a = alloci(n);
	for (int i = 0; i < n; i++) qin >> a[i], a[i]--;
	
	Block blk(n);
	const int blocks = blk.blocks;
	for (int i = 0; i < blocks; i++) blk.init_block(i, a);
	blk.init();

	i64 lst = 0;
	for (int i = 0, l, r; i < m; i++) {
		qin >> l >> r, l ^= lst, r ^= lst, l--, r--;
		qout << (lst = blk.query(l, r, a)) << '\n';
	}
	return 0;
}