LG P5046 [Ynoi2019 模拟赛] Yuno loves sqrt technology I Solution

Description

给定一个 nnn 阶排列 a=(a1,a2,⋯ ,an)a=(a_1,a_2,\cdots,a_n)a=(a1,a2,,an).
qqq 次询问 (l,r)(l,r)(l,r),求 ∑l≤i<j≤r[ai>aj]\sum\limits_{l\le i<j\le r} [a_i>a_j]li<jr[ai>aj].
强制在线,每次给定 l0,r0l_0,r_0l0,r0,则 l=l0⊕lastans,r=r0⊕lastansl=l_0\oplus \textit{lastans},r=r_0\oplus \textit{lastans}l=l0lastans,r=r0lastans.

Limitations

1≤n,m≤1051\le n,m\le 10^51n,m105
1≤ai≤n1\le a_i\le n1ain
1≤l≤r≤n1\le l\le r\le n1lrn
0.75s,512MB\textcolor{red}{0.75\text{s}},512\text{MB}0.75s,512MB

Solution

下记 f(l,r)=∑l≤i<j≤r[ai>aj]f(l,r)=\sum\limits_{l\le i<j\le r} [a_i>a_j]f(l,r)=li<jr[ai>aj], g(l,r,L,R)=∑l≤i≤r∑L≤j≤R[ai>aj]g(l,r,L,R)=\sum\limits_{l\le i\le r}\sum\limits_{L\le j\le R}[a_i>a_j]g(l,r,L,R)=lirLjR[ai>aj].

考虑直接对 aaa 分块,第 bbb 块内预处理出:

  • prei=f(Lb,i)\textit{pre}_i=f(L_b,i)prei=f(Lb,i)(第 bbb 块内前缀的答案)
  • sufi=f(i,Rb)\textit{suf}_i=f(i,R_b)sufi=f(i,Rb)(第 bbb 块内后缀的答案)
  • cntb,v=∑i=1Rb[ai<v]\textit{cnt}_{b,v}=\sum\limits_{i=1}^{R_b} [a_i< v]cntb,v=i=1Rb[ai<v].(第 1∼b1\sim b1b 块内 <v<v<v 的元素个数)
  • pip_ipiaLb∼aRba_{L_b}\sim a_{R_b}aLbaRb 排序后的结果,以(实际值,原数组下标)形式保存.

其中 pre\textit{pre}presuf\textit{suf}suf 可用 BIT 求出.

接下来需要预处理出整块答案 h(i,j)h(i,j)h(i,j),显然 h(i,i)=preRih(i,i)=\textit{pre}_{R_{i}}h(i,i)=preRi,由容斥原理可得:
h(i,j)=h(i+1,j)+h(i,j−1)−h(i+1,j−1)+g(Li,Ri,Lj,Rj)h(i,j)=h(i+1,j)+h(i,j-1)-h(i+1,j-1)+g(L_i,R_i,L_j,R_j)h(i,j)=h(i+1,j)+h(i,j1)h(i+1,j1)+g(Li,Ri,Lj,Rj)

用区间 dp 即可求出 hhh,但是如果现在 gggBIT 求,会多出来一个 log⁡\loglog.
注意到每块内 pip_ipi 单调递增,所以可以双指针求,复杂度降至 O(B)O(B)O(B)

inline int _query(int bl, int br, int l, int r, int _l, int _r) {
    int p = L[br] - 1, tot = 0, res = 0;
    for (int i = L[bl]; i <= R[bl]; i++) {
        if (sorted[i].second < l || r < sorted[i].second) continue;
        while (p < R[br] && sorted[i].first > sorted[p + 1].first) {
            p++;
            if (_l <= sorted[p].second && sorted[p].second <= _r) tot++;
        }
        res += tot;
    }
    return res;
}

接下来考虑查询,设 p=bell,q=belrp=\textit{bel}_l,q=\textit{bel}_rp=bell,q=belr.
p=qp=qp=q,则容斥一下可得 ans=prer−prel−1−g(Lp,l−1,l,r)\textit{ans}=pre_r-pre_{l-1}-g(L_p,l-1,l,r)ans=prerprel1g(Lp,l1,l,r),注意特判 l=Lpl=L_pl=Lp.

p≠qp\ne qp=q,则贡献可分为:

  • 左右散块各自的贡献.
  • 整块间的贡献.
  • 左右散块之间的贡献.
  • 左散块和整块间的贡献.
  • 整块和右散块间的贡献.

前三种加起来显然是 sufl+prer+h(p,q)+g(l,Rp,Lq,r)\textit{suf}_l+\textit{pre}_r+h(p,q)+g(l,R_p,L_q,r)sufl+prer+h(p,q)+g(l,Rp,Lq,r).
对于第五种,考虑左散块内每个数 aia_iai,则整块内每个 <ai<a_i<ai 的数都会和 aia_iai 产生贡献,第六种同理,所以这两种贡献之和是:
(∑l≤i≤Rpcntq−1,ai−cntp,ai)+(∑Lq≤i≤rRq−1−Lp+1+1−cntq−1,ai+cntp,ai)(\sum\limits_{l\le i\le R_p}cnt_{q-1,a_i}-cnt_{p,a_i})+(\sum\limits_{L_q\le i\le r}R_{q-1}-L_{p+1}+1-cnt_{q-1,a_i}+cnt_{p,a_i})(liRpcntq1,aicntp,ai)+(LqirRq1Lp+1+1cntq1,ai+cntp,ai)

这样就做完了……吗?

Optimise

时限很紧,需要作如下卡常:

  • 使用内存池分配空间(对于 STL 党)
  • 块长取 0.5n0.5\sqrt n0.5n
  • 快读快写
  • O2

然后就能卡过了.

Code

已删 fastio.
6.15KB,4.58s,250.54MB  (C++20 with O2)6.15\text{KB},4.58\text{s},250.54\text{MB}\;\texttt{(C++20 with O2)}6.15KB,4.58s,250.54MB(C++20 with O2)

#include <bits/stdc++.h>
using namespace std;

using i64 = long long;
using ui64 = unsigned long long;
using i128 = __int128;
using ui128 = unsigned __int128;
using f4 = float;
using f8 = double;
using f16 = long double;

template<class T>
bool chmax(T &a, const T &b){
	if(a < b){ a = b; return true; }
	return false;
}

template<class T>
bool chmin(T &a, const T &b){
	if(a > b){ a = b; return true; }
	return false;
}

namespace mem {
	constexpr int L = 7.5e7 + 10;
	int pool[L], *ptr = pool;
	
	inline int* alloci(int n) {
		int* res = ptr;
		ptr += n;
		return res;
	}
	
	inline i64* allocl(int n) {
		return (i64*)alloci(2 * n);
	}
}

using mem::alloci;
using mem::allocl;

namespace Fastio {}
using Fastio::qin;
using Fastio::qout;

inline int lowbit(int x) { return x & -x; }

template<class T>
struct fenwick {
	int n;
	vector<T> c;
	
	inline fenwick() {}
	inline fenwick(int _n): n(_n) { c.resize(n + 1); }
	
	inline fenwick(const vector<T> &a): n(a.size()) {
		c.resize(n + 1);
		for(int i = 1; i <= n; i++){
			c[i] = c[i] + a[i - 1];
			int j = i + lowbit(i);
			if(j <= n) c[j] = c[j] + c[i];
		}
	}
	
	inline void add(int x, const T& v) {
		for (int i = x + 1; i <= n; i += lowbit(i)) c[i] = c[i] + v;
	}
	
	inline T ask(int x) {
		T ans{};
		for (int i = x + 1; i; i -= lowbit(i)) ans = ans + c[i];
		return ans;
	}
	
	inline T ask(int l, int r) { return ask(r) - ask(l - 1); }
};

using pii = pair<int, int>;

struct Block {
	int n, B, blocks;
	int *pre, *suf, *bel, *L, *R;
	vector<int*> cnt;
	vector<i64*> ans;
	vector<pii> sorted;
	fenwick<int> fwk;
	
	inline Block() {}
	inline Block(int _n) : n(_n), fwk(_n) {
        B = max(1, int(sqrt(n) / 2));
		blocks = (n + B - 1) / B;
		pre = alloci(n), suf = alloci(n);
		bel = alloci(n), L = alloci(blocks), R = alloci(blocks);
		
		cnt.resize(blocks), ans.resize(blocks), sorted.resize(n);
		for (int i = 0; i < blocks; i++) {
			L[i] = i * B;
			R[i] = min(L[i] + B, n) - 1;
			cnt[i] = alloci(n);
			ans[i] = allocl(blocks);
		}
	}
	
	inline void init_block(int i, const int* a) {
		const int bl = L[i], br = R[i];
	    for (int j = bl; j <= br; j++) {
	        bel[j] = i, cnt[i][a[j]]++;
	        if (j ^ bl) pre[j] = pre[j - 1] + fwk.ask(a[j] + 1, n - 1);
	        fwk.add(a[j], 1);
	    }
	    for (int j = bl; j <= br; j++) fwk.add(a[j], -1);
	    
	    for (int j = br; j >= bl; j--) {
	        if (j ^ br) suf[j] = suf[j + 1] + fwk.ask(a[j]);
	        fwk.add(a[j], 1);
	    }
	    for (int j = bl; j <= br; j++) fwk.add(a[j], -1);
	    
	    int res = 0;
	    for (int j = 0; j < n; j++) {
	    	res += cnt[i][j];
	    	cnt[i][j] = res + (i > 0 ? cnt[i - 1][j] : 0);
	    }
	    
	    ans[i][i] = pre[br];
	    for (int j = bl; j <= br; j++) sorted[j] = pii(a[j], j);
	    sort(sorted.begin() + bl, sorted.begin() + br + 1);
	}
	
	inline void init() {
		for (int len = 2; len <= blocks; len++)
			for (int bl = 1; bl + len <= blocks; bl++) {
				const int br = bl + len - 1;
				ans[bl][br] = (
				    ans[bl + 1][br] + ans[bl][br - 1] 
				  - ans[bl + 1][br - 1] 
				  + _query(bl, br, L[bl], R[bl], L[br], R[br])
				);
			}
	}
	
	inline int _query(int bl, int br, int l, int r, int _l, int _r) {
	    int p = L[br] - 1, tot = 0, res = 0;
	    for (int i = L[bl]; i <= R[bl]; i++) {
	        if (sorted[i].second < l || r < sorted[i].second) continue;
	        while (p < R[br] && sorted[i].first > sorted[p + 1].first) {
	            p++;
	            if (_l <= sorted[p].second && sorted[p].second <= _r) tot++;
	        }
	        res += tot;
	    }
	    return res;
	}
	
	inline i64 query(int l, int r, const int* a) {
	    const int bl = bel[l], br = bel[r];
	    i64 res = 0;
	    if (bl == br) {
	        if (l == L[bl]) return pre[r];
	        res = pre[r] - pre[l - 1] - _query(bl, bl, 0, l - 1, l, r);
	        return res;
	    }
	    res = suf[l] + pre[r] + _query(bl, br, l, R[bl], L[br], r) + ans[bl + 1][br - 1];
	    for (int i = l; i <= R[bl]; i++)
	        res += cnt[br - 1][a[i]] - cnt[bl][a[i]];
	    for (int i = L[br]; i <= r; i++)
	        res += (R[br - 1] - L[bl + 1] + 1) - cnt[br - 1][a[i]] + cnt[bl][a[i]];
	    return res;
	}
};

signed main() {
	ios::sync_with_stdio(0);
	cin.tie(0), cout.tie(0);
	
	int n, m;
	qin >> n >> m;
	
	int *a = alloci(n);
	for (int i = 0; i < n; i++) qin >> a[i], a[i]--;
	
	Block blk(n);
	const int blocks = blk.blocks;
	for (int i = 0; i < blocks; i++) blk.init_block(i, a);
	blk.init();

	i64 lst = 0;
	for (int i = 0, l, r; i < m; i++) {
		qin >> l >> r, l ^= lst, r ^= lst, l--, r--;
		qout << (lst = blk.query(l, r, a)) << '\n';
	}
	return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值