首先有(varphi(ij) = frac{varphi(i) varphi(j) gcd(i,j)}{varphi(gcd(i,j))}),把欧拉函数的定义式代入即可证明
然后就可以开始推式子(默认(n leq m)):
(egin{align*} sumlimits_{i=1}^n sumlimits_{j=1}^m varphi(ij) &= sumlimits_{i=1}^n sumlimits_{j=1}^m frac{varphi(i) varphi(j) gcd(i,j)}{varphi(gcd(i,j))} \ &= sumlimits_{d=1}^n frac{d}{varphi(d)} sumlimits_{i=1}^{frac{n}{d}} sumlimits_{j=1}^{frac{m}{d}} varphi(id) varphi(jd) sumlimits_{p | i , p | j} mu(p) \ &= sumlimits_{d=1}^n frac{d}{varphi(d)} sumlimits_{p=1}^frac{n}{d} mu(p) sumlimits_{i=1}^frac{n}{dp} varphi(idp) sumlimits_{j=1}^frac{m}{dp} varphi(jdp) \ &= sumlimits_{T=1}^n sumlimits_{d | T}frac{d}{varphi(d)} mu(frac{T}{d}) sumlimits_{i=1}^frac{n}{T} varphi(iT) sumlimits_{j=1}^frac{m}{T} mu(jT) end{align*})
(f(T) = sumlimits_{d | T} frac{d}{varphi(d)} mu(frac{T}{d}))可以在(O(nlogn))的时间内预处理,而(g(p,q) = sumlimits_{i=1}^p varphi(iq))则因为需要满足(pq leq n)所以只有(O(nlogn))对((p,q))合法,使用动态数组可以做到(O(nlogn))的预处理。
我们现在需要求的就是(sumlimits_{T = 1}^n f(T) g(frac{n}{T} , T) g(frac{m}{T} , T)),注意到有两个除法,可以想到数论分块,但是因为是三个东西相乘求和,所以我们需要预处理的是对于(forall i in [1,10^5] , forall j in [1 , 10^5] , forall k in [1,n] , sumlimits_{T=1}^k f(T) g(i,T) g(j,T)),复杂度太高难以接受,而直接暴力只有50pts。
注意到我们现在有预处理和暴力两种做法,虽然它们都会TLE,但是我们可以考虑根号分治,把它们放在一起做。
考虑预处理(forall i in [1,B] , forall j in [1,B] , forall k in [1,n] , sumlimits_{T=1}^k f(T) g(i,T) g(j,T)),其中(B)是一个常数。那么我们预处理的复杂度就是(O(nB^2)),而在数论分块的过程中,如果(frac{n}{T} , frac{m}{T}leq B)则直接调用答案,否则暴力计算,因为(frac{n}{T} geq B)意味着(T leq frac{n}{B}),所以复杂度是(O(Tfrac{n}{B}))的。
那么总的复杂度就是(O(nB^2 + frac{Tn}{B})),当(B = T^frac{1}{3})时有最优复杂度(O(nT^frac{2}{3}))。
#include<bits/stdc++.h>
//this code is written by Itst
using namespace std;
const int _ = 1e5 + 7 , MOD = 998244353 , B = pow(10000 , 1.0 / 3) + 1;
int phi[_] , mu[_] , prm[_] , cnt , T , N , M;
int *g[_] , f[_] , ans[B + 3][B + 3][_];
bool nprm[_];
int poww(long long a , int b){
int times = 1;
while(b){
if(b & 1) times = times * a % MOD;
a = a * a % MOD;
b >>= 1;
}
return times;
}
void init(){
mu[1] = phi[1] = 1;
for(int i = 2 ; i <= 1e5 ; ++i){
if(!nprm[i]){
prm[++cnt] = i; phi[i] = i - 1; mu[i] = -1;
}
for(int j = 1 ; i * prm[j] <= 1e5 ; ++j){
nprm[i * prm[j]] = 1;
if(i % prm[j] == 0){
phi[i * prm[j]] = phi[i] * prm[j];
break;
}
phi[i * prm[j]] = phi[i] * (prm[j] - 1);
mu[i * prm[j]] = -1 * mu[i];
}
}
for(int i = 1 ; i <= 1e5 ; ++i){
int tms = 1ll * i * poww(phi[i] , MOD - 2) % MOD;
for(int j = 1 ; j * i <= 1e5 ; ++j)
f[i * j] = (f[i * j] + 1ll * mu[j] * tms + MOD) % MOD;
}
for(int i = 1 ; i <= 1e5 ; ++i){
g[i] = new int[(int)(1e5 / i) + 1];
g[i][0] = 0;
for(int j = 1 ; j * i <= 1e5 ; ++j)
g[i][j] = (g[i][j - 1] + phi[i * j]) % MOD;
}
for(int i = 1 ; i <= B ; ++i)
for(int j = i ; j <= B ; ++j)
for(int k = 1 ; j * k <= 1e5 ; ++k)
ans[i][j][k] = (ans[i][j][k - 1] + 1ll * f[k] * g[k][i] % MOD * g[k][j]) % MOD;
}
void work(){
cin >> N >> M;
if(N > M) swap(N , M);
int sum = 0;
for(int i = 1 , pi; i <= N ; i = pi + 1){
pi = min(N / (N / i) , M / (M / i));
if(N / i <= B && M / i <= B)
sum = (0ll + sum + ans[N / i][M / i][pi] - ans[N / i][M / i][i - 1] + MOD) % MOD;
else
for(int j = i ; j <= pi ; ++j)
sum = (sum + 1ll * f[j] * g[j][N / j] % MOD * g[j][M / j]) % MOD;
}
cout << sum << endl;
}
signed main(){
#ifndef ONLINE_JUDGE
freopen("in","r",stdin);
freopen("out","w",stdout);
#endif
ios::sync_with_stdio(0);
init();
cin >> T;
while(T--) work();
return 0;
}