【LOJ #3058】「HNOI2019」白兔之舞(单位根反演+矩阵快速幂+MTT)

传送门

首先可以发现
ii步,从xxyy的方案很好算
只需要把方案矩阵AAA[x,y]iA^i_{[x,y]}再乘一个(Li){Lchoose i}就可以了

那么对于每个tt的答案就是
m=0L[kmt](A[x,y]m(Lm))sum_{m=0}^L[k|m-t](A^m_{[x,y]}{Lchoose m})
考虑单位根反演
得到
m=0L1ki=0k1wk(mt)t(A[x,y]m(Lm))sum_{m=0}^Lfrac 1 ksum_{i=0}^{k-1}w_k^{(m-t)*t}(A^m_{[x,y]}{Lchoose m})
=1ki=0k1wktim=0LA[x,y]m(Lm)wkmi=frac 1 ksum_{i=0}^{k-1}w_k^{-ti}sum_{m=0}^LA^m_{[x,y]}{Lchoose m}w_{k}^{mi}
=1ki=0k1wkti(Awki+I)[x,y]L=frac 1 ksum_{i=0}^{k-1}w_k^{-ti}(A*w_k^i+I)^L_{[x,y]}
(Awki+I)[x,y]L=ai(A*w_k^i+I)^L_{[x,y]}=a_i
这个可以矩阵快速幂O(klogL)O(klogL)得到

ans=1ki=0k1wktiaians=frac 1 ksum_{i=0}^{k-1}w_k^{-ti}a_i
这时候可以O(k2)O(k^2)做了
但好像一分都没有
考虑怎么构造成卷积的形式

考虑有
ij=(t2)+(i2)(i+t2)-ij={tchoose 2}+{ichoose 2}-{i+tchoose 2}
于是答案就愉快地变成了
1kwk(t2)i=0k1wk(i2)wk(i+t2)aifrac 1 kw_k^{{tchoose 2}}sum_{i=0}^{k-1}w_k^{{ichoose 2}}w_k^{-{i+tchoose 2}}a_i
一个是ii,一个是i+ti+t,把其中一个翻转即可卷积了
复杂度O(klogk+klogL)O(klogk+klogL)

由于模数,要写MTTMTT

zxyzxyloj rk1loj rk1经验:
把矩阵快速幂里的所有循环都展开
实际上可以发现若把第一个i>nii->n-i
最后实际上就是所有k+ik+i
需要的只有[k+1,2k][k+1,2k]中的项
而做的是一个长为kk和长为2k2k的卷积
可以只把长度开到2k2k,这样多出去的是前面去的

#include<bits/stdc++.h>
using namespace std;
#define cs const
#define re regitster
#define pb push_back
#define fi first
#define se second
#define pii pair<int,int>
#define ll long long
#define bg begin
cs int RLEN=1<<20|1;
inline char gc(){
	static char ibuf[RLEN],*ib,*ob;
	(ib==ob)&&(ob=(ib=ibuf)+fread(ibuf,1,RLEN,stdin));
	return (ib==ob)?EOF:*ib++;
}
inline int read(){
	char ch=gc();
	int res=0;bool f=1;
	while(!isdigit(ch))f^=ch=='-',ch=gc();
	while(isdigit(ch))res=(res+(res<<2)<<1)+(ch^48),ch=gc();
	return f?res:-res;
}
inline void readstring(char *s){
	int top=0;
	char ch=gc();
	while(isspace(ch))ch=gc();
	while(!isspace(ch)&&ch!=EOF)s[++top]=ch,ch=gc();
}
template<class tp>inline void chemx(tp &a,tp b){a<b?a=b:0;}
template<class tp>inline void chemn(tp &a,tp b){a>b?a=b:0;}
int mod,G;
inline int add(int a,int b){return (a+=b)>=mod?(a-mod):a;}
inline int dec(int a,int b){a-=b;return a+(a>>31&mod);}
inline int mul(int a,int b){static ll r;r=1ll*a*b;return (r>=mod)?(r%mod):r;}
inline void Add(int &a,int b){(a+=b)>=mod?(a-=mod):0;}
inline void Dec(int &a,int b){a-=b,a+=a>>31&mod;}
inline void Mul(int &a,int b){static ll r;r=1ll*a*b,a=(r>=mod)?(r%mod):r;}
inline int ksm(int a,int b,int res=1){for(;b;b>>=1,Mul(a,a))(b&1)&&(Mul(res,a),1);return res;}
inline int Inv(int x){return ksm(x,mod-2);}
inline int fix(int x){return (x<0)?(x+mod):x;}
int n,k,L,x,y;
struct mat{
	int a[3][3];
	mat(){memset(a,0,sizeof(a));}
	inline void I(){a[0][0]=a[1][1]=a[2][2]=1;}
	friend inline mat operator *(cs mat &a,cs mat &b){
		mat c;
		for(int i=0;i<n;i++)
		for(int k=0;k<n;k++)
		for(int j=0;j<n;j++)
		Add(c.a[i][j],mul(a.a[i][k],b.a[k][j]));
		return c;
	}
	friend inline mat operator *(mat a,int b){
		for(int i=0;i<n;i++)
		for(int j=0;j<n;j++)Mul(a.a[i][j],b);
		return a;
	}
	friend inline mat operator +(cs mat &a,cs mat &b){
		mat c;
		for(int i=0;i<n;i++)
		for(int j=0;j<n;j++)
		c.a[i][j]=add(a.a[i][j],b.a[i][j]);
		return c;
	}
}I,A;
inline mat ksm(mat a,int b){
	mat ret=I;
	for(;b;b>>=1,a=a*a)if(b&1)ret=ret*a;
	return ret;
}
inline void findG(int mod){
	static int pr[100],tot=0;
	int phi=mod-1;
	for(int i=2;i*i<=phi;i++){
		if(phi%i==0){
			pr[++tot]=i;
			while(phi%i==0)phi/=i;
		}
	}
	if(phi>1)pr[++tot]=phi;
	G=2;
	while(0721){
		bool flag=0;
		for(int i=1;i<=tot;i++)if(ksm(G,(mod-1)/pr[i])==1){flag=1;break;}
		if(flag==0)break;
		G++;
	}
}
cs int C=18,N=(1<<C)|1;
namespace Poly{
	struct plx{
		double x,y;
		plx(double a=0,double b=0):x(a),y(b){}
		friend inline plx operator +(cs plx &a,cs plx &b){
			return plx(a.x+b.x,a.y+b.y);
		}
		friend inline plx operator -(cs plx &a,cs plx &b){
			return plx(a.x-b.x,a.y-b.y);
		}
		friend inline plx operator *(cs plx &a,cs plx &b){
			return plx(a.x*b.x-a.y*b.y,a.x*b.y+a.y*b.x);
		}
		friend inline plx operator /(cs plx &a,cs double b){
			return plx(a.x/b,a.y/b);
		}
		inline plx conj(){return plx(x,-y);}
	};
	int rev[N];
	vector<plx>w[C+1];
	cs double pi=acos(-1);
	inline void init_w(int t){
		for(int i=1;i<=t;i++)w[i].resize(1<<(i-1));
		plx wn(cos(pi/(1<<(t-1))),sin(pi/(1<<(t-1))));
		w[t][0]=plx(1,0);
		for(int i=1;i<(1<<(t-1));i++)
		w[t][i]=(i&31)?(w[t][i-1]*wn):plx(cos(pi*i/(1<<(t-1))),sin(pi*i/(1<<(t-1))));
		for(int i=t-1;i;i--)
		for(int j=0;j<(1<<(i-1));j++)w[i][j]=w[i+1][j<<1];
	}
	inline void init_rev(int lim){
		for(int i=0;i<lim;i++)rev[i]=(rev[i>>1]>>1)|((i&1)*(lim>>1));
	}
	inline void fft(plx *f,int lim,int kd){
		for(int i=0;i<lim;i++)if(i>rev[i])swap(f[i],f[rev[i]]);
		plx a0,a1;
		for(int mid=1,l=1;mid<lim;mid<<=1,l++)
		for(int i=0;i<lim;i+=mid<<1)
		for(int j=0;j<mid;j++)
		a0=f[i+j],a1=f[i+j+mid]*w[l][j],f[i+j]=a0+a1,f[i+j+mid]=a0-a1;
		if(kd==-1){
			reverse(f+1,f+lim);
			for(int i=0;i<lim;i++)f[i]=f[i]/lim;
		}
	}
	cs int M=(1<<15)-1;
	inline void Mul(int *A,int *B,int deg,int *ret){
		static plx a[N],b[N],c[N],d[N],da,db,dc,dd;int lim=1,tim=1;
		while(lim<deg)lim<<=1,tim++;
		init_w(tim),init_rev(lim);
		for(int i=0;i<lim;i++)a[i]=plx(A[i]&M,A[i]>>15),b[i]=plx(B[i]&M,B[i]>>15);
		fft(a,lim,1),fft(b,lim,1);
		for(int i=0;i<lim;i++){
			int j=(lim-i)&(lim-1);
			da=(a[i]+a[j].conj())*plx(0.5,0);
			db=(a[j].conj()-a[i])*plx(0,0.5);
			dc=(b[i]+b[j].conj())*plx(0.5,0);
			dd=(b[j].conj()-b[i])*plx(0,0.5);
			c[i]=(da*dc)+(da*dd)*plx(0,1);
			d[i]=(db*dc)+(db*dd)*plx(0,1);
		}
		fft(c,lim,-1),fft(d,lim,-1);
		for(int i=0;i<lim;i++){
			ll da=(ll)(d[i].x+0.5)%mod,db=(ll)(d[i].y+0.5)%mod,dc=(ll)(c[i].x+0.5)%mod,dd=(ll)(c[i].y+0.5)%mod;
			ret[i]=((da<<15)+(db<<30)+(dc)+(dd<<15))%mod;
		}
	}
}
int w[N],v[N],f[N],g[N],tmp[N];
inline int C2(int x){return 1ll*x*(x-1)/2%k;}
int main(){
	#ifdef Stargazer
	freopen("lx.in","r",stdin);
	#endif
	I.I();
	n=read(),k=read(),L=read(),x=read()-1,y=read()-1,mod=read();
	for(int i=0;i<n;i++)for(int j=0;j<n;j++)A.a[i][j]=read();
	findG(mod),w[0]=1;int wk=ksm(G,(mod-1)/k);
	for(int i=1;i<k;i++)w[i]=mul(w[i-1],wk);
	for(int i=0;i<k;i++)f[i]=mul(w[C2(i)],ksm(A*w[i]+I,L).a[x][y]);
	reverse(f,f+k+1);
	for(int i=0;i<2*k;i++)g[i]=w[(k-C2(i))%k];
	Poly::Mul(f,g,2*k,tmp);
	for(int i=0,iv=Inv(k);i<k;i++)cout<<mul(tmp[k+i],mul(w[C2(i)],iv))<<'
';
}
原文地址:https://www.cnblogs.com/stargazer-cyk/p/12328339.html