[一本通学习笔记] 字符串哈希

做字符串哈希我们通常要选定一个模数和一个基底。这样我们就可以快速处理出一个字符串的hash，并且可以做到log时间内求出一个子串的Hash。

哈希表虽然好写并且跑得快但心里总有点发虚。

题目很裸，注意getline(cin,...)的使用。

#include <bits/stdc++.h>
using namespace std;

#define modulo 10000007
#define base 131

int buck[10000009];

int getHash(string s) {
    int res = 0;
    for(int i=0;i<s.length();i++)
        res = ((res*base)+s[i])%modulo;
    return res;
}

int main() { ios::sync_with_stdio(false);
    int n;
    cin>>n;
    string tmp;
    getline(cin,tmp);
    for(int i=1;i<=n;i++) {
        getline(cin,tmp);
        if(tmp[0]=='a')
            buck[getHash(tmp.substr(4,tmp.length()-4))]=1;
        else
            cout<<(buck[getHash(tmp.substr(5,tmp.length()-5))]?"yes":"no")<<endl;
    }
}

这题有SA或者KMP的经典解法，当然没有hash简单方便了。

暴力枚举循环节长度即可。

#include <bits/stdc++.h>
using namespace std;
#define ll long long
const ll modulo = 1000000007ll;
const ll base = 131ll;
ll qpow(ll p,ll q) {
    ll ret=1,bas=p;
    while(q) {
        if(q&1) ret*=bas;
        ret%=modulo;
        bas*=bas;
        bas%=modulo;
        q>>=1;
    }
    return ret;
}
ll h[1000005];
string str;
void prehash() {
    h[0]=str[0];
    for(ll i=1;i<str.length();i++)
        h[i]=(str[i]+h[i-1]*base)%modulo;
}
ll gethash(ll l,ll r) {
    if(l==0) return h[r];
    return (((h[r]-h[l-1]*qpow(base,r-l+1)%modulo))%modulo+modulo)%modulo;
}

int main() { ios::sync_with_stdio(false);
    while(cin>>str) {
        if(str[0]=='.') return 0;
        prehash();
        int n=str.length();
        for(int i=1;i<=n;i++) {
            if(n%i) continue;
            int flag = 1,
                val = gethash(1-1,i-1);
            for(int j=i+1;j<=n;j+=i)
                if(gethash(j-1,j+i-1-1)!=val) {
                    flag=0;
                    break;
                }
            if(flag) {
                cout<<n/i<<endl;
                break;
            }
        }
    }
}

看起来更像是KMP，当然暴力枚举hash判断也是轻松的。

#include <bits/stdc++.h>
using namespace std;
#define ll long long
const ll modulo = 1000000007;
const ll base = 131;

string str;
ll h[1000005];

ll qpow(ll p,ll q) {
    ll r = 1;
    for(; q; p*=p, p%=modulo, q>>=1) if(q&1) r*=p, r%=modulo;
    return r;
}

void prehash() {
    for(int i=0;i<=str.length();i++) h[i]=0;
    h[0] = str[0];
    for(int i=1;i<str.length();i++)
        h[i] = (str[i] + h[i-1]*base) % modulo;
}

ll gethash(int l,int r) { // Warning: index = 0..n-1
    if(l==0) return h[r];
    return ((h[r] - h[l-1]*qpow(base,r-l+1))%modulo + modulo)%modulo;
}

int main() {
    ios::sync_with_stdio(false);
    while(cin>>str) {
        prehash();
        int n = str.length();
        for(int i=1;i<=n;i++)
            if(gethash(0,i-1) == gethash(n-i,n-1))
                cout<<i<<" ";
        cout<<endl;
    }
}

如果直接枚举区间长度的所有因子作为循环倍率，那么显然会TLE。

考虑到循环次数一定是每个字母出现次数的倍数。所以循环次数一定同时还是是每个字母出现次数最小公倍数的因数。

一开始脑抽写错了判断边界WA了一堆。后来发现自己傻逼，并得到了80分代码。

#include <bits/stdc++.h>
using namespace std;
#define ll unsigned long long
const ll base = 131;
ll n,q,h[1000005],c[30][500005],pw[1000005];
char str[500005];
string s;
ll gethash(int l,int r) {
    return ((h[r]-(l?h[l-1]:0)*pw[r-l+1]));
}
ll gcd(ll x,ll y) {
    return (!y)?x:gcd(y,x%y);
}

int main() {
    pw[0]=1;
    for(int i=1;i<=1000000;i++) pw[i]=pw[i-1]*base;
    cin>>n>>s>>q;
    h[0]=s[0];
    c[s[0]-'a'][0]=1;
    for(int i=1;i<n;i++)
        for(int j=0;j<26;j++)
            c[j][i]=c[j][i-1]+((s[i]==j+'a')?1:0);
    for(int i=1;i<n;i++)
        h[i]=(h[i-1]*base+(ll)s[i]);
    for(int i=1;i<=q;i++) {
        ll l,r;
        cin>>l>>r; --l,--r;
        // siz * tim = len
        // siz | len
        // tim | gcd
        int len = r-l+1;
        ll g=0;
        for(int i=0;i<26;i++)
            g=gcd(c[i][r]-(l?c[i][l-1]:0),g);
        //cout<<"g "<<g<<endl;
        int flag = 0;
        for(int t=1;t*t<=g;t++) {
            int tim = g/t;
            if(g%t) continue;
            if(len%tim) continue;
            int siz=len/tim;
            if(gethash(l,r-siz)==gethash(l+siz,r)) {
                printf("%d
",siz); flag=1;
                break;
            }
        }
        if(flag) continue;
        for(int tim=sqrt(g);tim>=1;tim--) {
            int siz=len/tim;
            if(len%tim) continue;
            if(gethash(l,r-siz)==gethash(l+siz,r)) {
                printf("%d
",siz); flag = 1;
                break;
            }
        }
        if(flag) continue;
        cout<<"error"<<endl;
    }
}

在添加了快读并手工开方后得到了86分代码

#include <bits/stdc++.h>
using namespace std;
#define ll unsigned long long
const ll base = 131;
ll n,q,h[1000005],c[30][500005],pw[1000005],sq[500005];
char str[500005];
string s;
inline int read() {
    int x=0,f=1; char c=getchar();
    while(c<'0'||c>'9') { if(c=='-') f=-1; c=getchar(); }
    while(c>='0'&&c<='9') { x=x*10+c-'0'; c=getchar(); }
    return x*f;
}
ll gethash(int l,int r) {
    return ((h[r]-(l?h[l-1]:0)*pw[r-l+1]));
}
ll gcd(ll x,ll y) {
    return (!y)?x:gcd(y,x%y);
}
int main() {
    pw[0]=1;
    for(int i=1;i<=1000000;i++) pw[i]=pw[i-1]*base;
    int _sqt = 1;
    for(int i=1;i<=500000;i++)  {
        if(_sqt*_sqt >= i) sq[i]=_sqt;
        else sq[i]=++_sqt;
    }
    n=read();
    gets(str);
    s=str;
    q=read();
    h[0]=s[0];
    c[s[0]-'a'][0]=1;
    for(int i=1;i<n;i++)
        for(int j=0;j<26;j++)
            c[j][i]=c[j][i-1]+((s[i]==j+'a')?1:0);
    for(int i=1;i<n;i++)
        h[i]=(h[i-1]*base+(ll)s[i]);
    for(int i=1;i<=q;i++) {
        ll l,r;
        l=read(); r=read(); --l,--r;
        // siz * tim = len
        // siz | len
        // tim | gcd
        int len = r-l+1;
        ll g=0;
        for(int i=0;i<26;i++)
            g=gcd(c[i][r]-(l?c[i][l-1]:0),g);
        //cout<<"g "<<g<<endl;
        int flag = 0;
        for(int t=1;t*t<=g;t++) {
            int tim = g/t;
            if(g%t) continue;
            if(len%tim) continue;
            int siz=len/tim;
            if(gethash(l,r-siz)==gethash(l+siz,r)) {
                printf("%d
",siz); flag=1;
                break;
            }
        }
        if(flag) continue;
        for(int tim=sq[g];tim>=1;tim--) {
            int siz=len/tim;
            if(len%tim) continue;
            if(gethash(l,r-siz)==gethash(l+siz,r)) {
                printf("%d
",siz); flag = 1;
                break;
            }
        }
        if(flag) continue;
    }
}

显然我们需要一些诡异的优化

设最短的循环节长度为siz，那么原串长度len = siz*tim

对tim,我们将它的质因子分解。

我们每次用质因数i去试除len，如果除去以后仍然是循环节，就说明i是k的一个因子，将他除去。得到的结果就是len。

#include <bits/stdc++.h>
using namespace std;
#define ll unsigned long long
const ll base = 131;
ll n, q, h[1000005], c[30][500005], pw[1000005], sq[500005], prime[500005], tot, mp[500005];
char str[500005];
string s;
inline ll read() {
    ll x = 0, f = 1;
    char c = getchar();
    while (c < '0' || c > '9') {
        if (c == '-')
            f = -1;
        c = getchar();
    }
    while (c >= '0' && c <= '9') {
        x = x * 10 + c - '0';
        c = getchar();
    }
    return x * f;
}
ll gethash(ll l, ll r) { return ((h[r] - (l ? h[l - 1] : 0) * pw[r - l + 1])); }
bool check(ll a, ll b, ll l) {
    return h[b] - h[a + l - 1] * pw[b - a - l + 1] ==
           h[a + ((b - a + 1) / l - 1) * l - 1] - (a ? h[a - 1] : 0) * pw[b - a - l + 1];
}
int main() {
    pw[0] = 1;
    for (ll i = 1; i <= 1000000; i++) pw[i] = pw[i - 1] * base;
    n = read();
    gets(str);
    s = str;
    q = read();
    for (ll i = 2; i <= n; i++) {
        if (mp[i] == 0) {
            prime[++tot] = i;
            mp[i] = i;
        }
        for (ll j = 1; j <= tot; j++) {
            if (prime[j] > mp[i] || prime[j] * i > n)
                break;
            mp[prime[j] * i] = prime[j];
        }
    }
    h[0] = s[0];
    for (ll i = 1; i < n; i++) h[i] = (h[i - 1] * base + (ll)s[i]);
    for (ll i = 1; i <= q; i++) {
        ll l, r;
        l = read();
        r = read();
        --l, --r;
        ll len = r - l + 1;
        ll tmp = len, ans = len;
        while (tmp > 1) {
            ll t = mp[tmp];  // Min prime divisor
            while (tmp % t == 0 && check(l, r, ans / mp[tmp])) tmp /= t, ans /= t;
            while (tmp % t == 0) tmp /= t;
        }
        printf("%d
", ans);
    }
}

#include <bits/stdc++.h>
using namespace std;

#define modulo 100000007
bool u[modulo];

#define ll long long

int main() {
    ll A,B,C;
    cin>>A>>B>>C;
    ll x=1; u[1]=1;
    for(int i=1;i<=2000000;i++) {
        x=(A*x+x%B)%C;
        if(u[x%modulo]) {
            cout<<i<<endl;
            return 0;
        }
        u[x%modulo]++;
    }
    cout<<-1<<endl;
    return 0;
}

理论上这种题是不敢用哈希表做的。

当然还是比离散化好写一点而且快很多的

#include <bits/stdc++.h>
using namespace std;
#define modulo 100000007
bool u[modulo];
int ans,pin,n,a[1000005];

int main() {
    ios::sync_with_stdio(false);
    scanf("%d",&n);
    for(int i=1;i<=n;i++) scanf("%d",&a[i]);
    pin=1;
    for(int i=1;i<=n;i++) {
        while(u[a[i]%modulo])
            u[(a[pin++])%modulo]=0;
        u[a[i]%modulo]=1;
        ans=max(ans,i-pin+1);
    }
    cout<<ans<<endl;
}

[一本通学习笔记] 字符串哈希

#10034. 「一本通 2.1 例 2」图书管理

#10035. 「一本通 2.1 练习 1」Power Strings

#10036. 「一本通 2.1 练习 2」Seek the Name, Seek the Fame

#10038. 「一本通 2.1 练习 4」A Horrible Poem

#10041. 「一本通 2.1 练习 7」门票

#10042. 「一本通 2.1 练习 8」收集雪花